1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-FAST
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-SLOW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=AVX,XOP,XOPAVX1
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=AVX,XOP,XOPAVX2
15 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
16 ; SSE-LABEL: shuffle_v8i16_01012323:
18 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
21 ; AVX-LABEL: shuffle_v8i16_01012323:
23 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
25 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
26 ret <8 x i16> %shuffle
28 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
29 ; SSE-LABEL: shuffle_v8i16_67452301:
31 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
34 ; AVX-LABEL: shuffle_v8i16_67452301:
36 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
38 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
39 ret <8 x i16> %shuffle
41 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
42 ; SSE2-LABEL: shuffle_v8i16_456789AB:
44 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
47 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
49 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
50 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
53 ; SSE41-LABEL: shuffle_v8i16_456789AB:
55 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
56 ; SSE41-NEXT: movdqa %xmm1, %xmm0
59 ; AVX-LABEL: shuffle_v8i16_456789AB:
61 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
63 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
64 ret <8 x i16> %shuffle
67 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
68 ; SSE-LABEL: shuffle_v8i16_00000000:
70 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
71 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
74 ; AVX1-LABEL: shuffle_v8i16_00000000:
76 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
77 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
80 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
81 ; AVX2OR512VL: # %bb.0:
82 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0
83 ; AVX2OR512VL-NEXT: retq
85 ; XOPAVX1-LABEL: shuffle_v8i16_00000000:
87 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
88 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
91 ; XOPAVX2-LABEL: shuffle_v8i16_00000000:
93 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
95 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
96 ret <8 x i16> %shuffle
98 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
99 ; SSE-LABEL: shuffle_v8i16_00004444:
101 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
102 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
105 ; AVX1-LABEL: shuffle_v8i16_00004444:
107 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
108 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
111 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
112 ; AVX2-SLOW: # %bb.0:
113 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
114 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
115 ; AVX2-SLOW-NEXT: retq
117 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
118 ; AVX2-FAST: # %bb.0:
119 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
120 ; AVX2-FAST-NEXT: retq
122 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
123 ; AVX512VL-SLOW: # %bb.0:
124 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
125 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
126 ; AVX512VL-SLOW-NEXT: retq
128 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
129 ; AVX512VL-FAST: # %bb.0:
130 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
131 ; AVX512VL-FAST-NEXT: retq
133 ; XOP-LABEL: shuffle_v8i16_00004444:
135 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
136 ; XOP-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
138 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
139 ret <8 x i16> %shuffle
141 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
142 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
144 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
147 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
149 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
151 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
152 ret <8 x i16> %shuffle
154 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
155 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
157 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
160 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
162 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
164 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
165 ret <8 x i16> %shuffle
167 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
168 ; SSE-LABEL: shuffle_v8i16_31206745:
170 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
171 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
174 ; AVX1-LABEL: shuffle_v8i16_31206745:
176 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
177 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
180 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
181 ; AVX2-SLOW: # %bb.0:
182 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
183 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
184 ; AVX2-SLOW-NEXT: retq
186 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
187 ; AVX2-FAST: # %bb.0:
188 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
189 ; AVX2-FAST-NEXT: retq
191 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
192 ; AVX512VL-SLOW: # %bb.0:
193 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
194 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
195 ; AVX512VL-SLOW-NEXT: retq
197 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
198 ; AVX512VL-FAST: # %bb.0:
199 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
200 ; AVX512VL-FAST-NEXT: retq
202 ; XOP-LABEL: shuffle_v8i16_31206745:
204 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
205 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
207 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
208 ret <8 x i16> %shuffle
210 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
211 ; SSE2-LABEL: shuffle_v8i16_44440000:
213 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
214 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
215 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
218 ; SSSE3-LABEL: shuffle_v8i16_44440000:
220 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
223 ; SSE41-LABEL: shuffle_v8i16_44440000:
225 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
228 ; AVX-LABEL: shuffle_v8i16_44440000:
230 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
232 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
233 ret <8 x i16> %shuffle
235 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
236 ; SSE-LABEL: shuffle_v8i16_23016745:
238 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
241 ; AVX-LABEL: shuffle_v8i16_23016745:
243 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,3,2]
245 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
246 ret <8 x i16> %shuffle
248 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
249 ; SSE-LABEL: shuffle_v8i16_23026745:
251 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
252 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
255 ; AVX1-LABEL: shuffle_v8i16_23026745:
257 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
258 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
261 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
262 ; AVX2-SLOW: # %bb.0:
263 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
264 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
265 ; AVX2-SLOW-NEXT: retq
267 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
268 ; AVX2-FAST: # %bb.0:
269 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
270 ; AVX2-FAST-NEXT: retq
272 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
273 ; AVX512VL-SLOW: # %bb.0:
274 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
275 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
276 ; AVX512VL-SLOW-NEXT: retq
278 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
279 ; AVX512VL-FAST: # %bb.0:
280 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
281 ; AVX512VL-FAST-NEXT: retq
283 ; XOP-LABEL: shuffle_v8i16_23026745:
285 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
286 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
288 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
289 ret <8 x i16> %shuffle
291 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
292 ; SSE-LABEL: shuffle_v8i16_23016747:
294 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
295 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
298 ; AVX1-LABEL: shuffle_v8i16_23016747:
300 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
301 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
304 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
305 ; AVX2-SLOW: # %bb.0:
306 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
307 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
308 ; AVX2-SLOW-NEXT: retq
310 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
311 ; AVX2-FAST: # %bb.0:
312 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
313 ; AVX2-FAST-NEXT: retq
315 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
316 ; AVX512VL-SLOW: # %bb.0:
317 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
318 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
319 ; AVX512VL-SLOW-NEXT: retq
321 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
322 ; AVX512VL-FAST: # %bb.0:
323 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
324 ; AVX512VL-FAST-NEXT: retq
326 ; XOP-LABEL: shuffle_v8i16_23016747:
328 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
329 ; XOP-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
331 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
332 ret <8 x i16> %shuffle
334 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
335 ; SSE2-LABEL: shuffle_v8i16_75643120:
337 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
338 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
339 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
342 ; SSSE3-LABEL: shuffle_v8i16_75643120:
344 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
347 ; SSE41-LABEL: shuffle_v8i16_75643120:
349 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
352 ; AVX-LABEL: shuffle_v8i16_75643120:
354 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
356 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
357 ret <8 x i16> %shuffle
360 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
361 ; SSE2-LABEL: shuffle_v8i16_10545410:
363 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
364 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
365 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
368 ; SSSE3-LABEL: shuffle_v8i16_10545410:
370 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
373 ; SSE41-LABEL: shuffle_v8i16_10545410:
375 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
378 ; AVX-LABEL: shuffle_v8i16_10545410:
380 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
382 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
383 ret <8 x i16> %shuffle
385 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
386 ; SSE2-LABEL: shuffle_v8i16_54105410:
388 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
389 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
390 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
393 ; SSSE3-LABEL: shuffle_v8i16_54105410:
395 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
398 ; SSE41-LABEL: shuffle_v8i16_54105410:
400 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
403 ; AVX-LABEL: shuffle_v8i16_54105410:
405 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
407 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
408 ret <8 x i16> %shuffle
410 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
411 ; SSE2-LABEL: shuffle_v8i16_54101054:
413 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
414 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
415 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
418 ; SSSE3-LABEL: shuffle_v8i16_54101054:
420 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
423 ; SSE41-LABEL: shuffle_v8i16_54101054:
425 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
428 ; AVX-LABEL: shuffle_v8i16_54101054:
430 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
432 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
433 ret <8 x i16> %shuffle
435 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
436 ; SSE2-LABEL: shuffle_v8i16_04400440:
438 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
439 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
440 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
443 ; SSSE3-LABEL: shuffle_v8i16_04400440:
445 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
448 ; SSE41-LABEL: shuffle_v8i16_04400440:
450 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
453 ; AVX-LABEL: shuffle_v8i16_04400440:
455 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
457 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
458 ret <8 x i16> %shuffle
460 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
461 ; SSE2-LABEL: shuffle_v8i16_40044004:
463 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
464 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
465 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
468 ; SSSE3-LABEL: shuffle_v8i16_40044004:
470 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
473 ; SSE41-LABEL: shuffle_v8i16_40044004:
475 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
478 ; AVX-LABEL: shuffle_v8i16_40044004:
480 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
482 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
483 ret <8 x i16> %shuffle
486 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
487 ; SSE2-LABEL: shuffle_v8i16_26405173:
489 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
490 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
491 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
492 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
493 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
496 ; SSSE3-LABEL: shuffle_v8i16_26405173:
498 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
501 ; SSE41-LABEL: shuffle_v8i16_26405173:
503 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
506 ; AVX-LABEL: shuffle_v8i16_26405173:
508 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
510 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
511 ret <8 x i16> %shuffle
513 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
514 ; SSE2-LABEL: shuffle_v8i16_20645173:
516 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
517 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
518 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
519 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
520 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
523 ; SSSE3-LABEL: shuffle_v8i16_20645173:
525 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
528 ; SSE41-LABEL: shuffle_v8i16_20645173:
530 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
533 ; AVX-LABEL: shuffle_v8i16_20645173:
535 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
537 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
538 ret <8 x i16> %shuffle
540 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
541 ; SSE2-LABEL: shuffle_v8i16_26401375:
543 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
544 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
545 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
546 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
549 ; SSSE3-LABEL: shuffle_v8i16_26401375:
551 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
554 ; SSE41-LABEL: shuffle_v8i16_26401375:
556 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
559 ; AVX-LABEL: shuffle_v8i16_26401375:
561 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
563 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
564 ret <8 x i16> %shuffle
567 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
568 ; SSE2-LABEL: shuffle_v8i16_66751643:
570 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
571 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
572 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
573 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
574 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
577 ; SSSE3-LABEL: shuffle_v8i16_66751643:
579 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
582 ; SSE41-LABEL: shuffle_v8i16_66751643:
584 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
587 ; AVX-LABEL: shuffle_v8i16_66751643:
589 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
591 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
592 ret <8 x i16> %shuffle
595 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
596 ; SSE2-LABEL: shuffle_v8i16_60514754:
598 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
599 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
600 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
601 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
604 ; SSSE3-LABEL: shuffle_v8i16_60514754:
606 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
609 ; SSE41-LABEL: shuffle_v8i16_60514754:
611 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
614 ; AVX-LABEL: shuffle_v8i16_60514754:
616 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
618 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
619 ret <8 x i16> %shuffle
622 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
623 ; SSE2-LABEL: shuffle_v8i16_00444444:
625 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
626 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
627 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
630 ; SSSE3-LABEL: shuffle_v8i16_00444444:
632 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
635 ; SSE41-LABEL: shuffle_v8i16_00444444:
637 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
640 ; AVX-LABEL: shuffle_v8i16_00444444:
642 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
644 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
645 ret <8 x i16> %shuffle
647 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
648 ; SSE2-LABEL: shuffle_v8i16_44004444:
650 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
651 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
652 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
655 ; SSSE3-LABEL: shuffle_v8i16_44004444:
657 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
660 ; SSE41-LABEL: shuffle_v8i16_44004444:
662 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
665 ; AVX-LABEL: shuffle_v8i16_44004444:
667 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
669 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
670 ret <8 x i16> %shuffle
672 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
673 ; SSE2-LABEL: shuffle_v8i16_04404444:
675 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
676 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
677 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
680 ; SSSE3-LABEL: shuffle_v8i16_04404444:
682 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
685 ; SSE41-LABEL: shuffle_v8i16_04404444:
687 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
690 ; AVX-LABEL: shuffle_v8i16_04404444:
692 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
694 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
695 ret <8 x i16> %shuffle
697 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
698 ; SSE2-LABEL: shuffle_v8i16_04400000:
700 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
701 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
702 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
705 ; SSSE3-LABEL: shuffle_v8i16_04400000:
707 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
710 ; SSE41-LABEL: shuffle_v8i16_04400000:
712 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
715 ; AVX-LABEL: shuffle_v8i16_04400000:
717 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
719 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
720 ret <8 x i16> %shuffle
722 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
723 ; SSE-LABEL: shuffle_v8i16_04404567:
725 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
726 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
729 ; AVX1-LABEL: shuffle_v8i16_04404567:
731 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
732 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
735 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
736 ; AVX2-SLOW: # %bb.0:
737 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
738 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
739 ; AVX2-SLOW-NEXT: retq
741 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
742 ; AVX2-FAST: # %bb.0:
743 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
744 ; AVX2-FAST-NEXT: retq
746 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
747 ; AVX512VL-SLOW: # %bb.0:
748 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
749 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
750 ; AVX512VL-SLOW-NEXT: retq
752 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
753 ; AVX512VL-FAST: # %bb.0:
754 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
755 ; AVX512VL-FAST-NEXT: retq
757 ; XOP-LABEL: shuffle_v8i16_04404567:
759 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
760 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
762 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
763 ret <8 x i16> %shuffle
766 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
767 ; SSE2-LABEL: shuffle_v8i16_0X444444:
769 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
770 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
771 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
774 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
776 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
779 ; SSE41-LABEL: shuffle_v8i16_0X444444:
781 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
784 ; AVX-LABEL: shuffle_v8i16_0X444444:
786 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
788 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
789 ret <8 x i16> %shuffle
791 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
792 ; SSE2-LABEL: shuffle_v8i16_44X04444:
794 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
795 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
796 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
799 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
801 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
804 ; SSE41-LABEL: shuffle_v8i16_44X04444:
806 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
809 ; AVX-LABEL: shuffle_v8i16_44X04444:
811 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
813 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
814 ret <8 x i16> %shuffle
816 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
817 ; SSE2-LABEL: shuffle_v8i16_X4404444:
819 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
820 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
821 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
824 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
826 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
829 ; SSE41-LABEL: shuffle_v8i16_X4404444:
831 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
834 ; AVX-LABEL: shuffle_v8i16_X4404444:
836 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
838 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
839 ret <8 x i16> %shuffle
842 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
843 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
845 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
846 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
847 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
850 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
852 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
855 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
857 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
860 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
862 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
864 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
865 ret <8 x i16> %shuffle
868 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
869 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
871 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
872 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
873 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
876 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
878 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
881 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
883 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
886 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
888 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
890 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
891 ret <8 x i16> %shuffle
894 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
895 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
897 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
898 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
899 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
902 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
904 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
907 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
909 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
912 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
914 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
916 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
917 ret <8 x i16> %shuffle
920 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
921 ; SSE2-LABEL: shuffle_v8i16_01274563:
923 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
924 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
925 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
928 ; SSSE3-LABEL: shuffle_v8i16_01274563:
930 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
933 ; SSE41-LABEL: shuffle_v8i16_01274563:
935 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
938 ; AVX-LABEL: shuffle_v8i16_01274563:
940 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
942 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
943 ret <8 x i16> %shuffle
946 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
947 ; SSE2-LABEL: shuffle_v8i16_45630127:
949 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
950 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
951 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
954 ; SSSE3-LABEL: shuffle_v8i16_45630127:
956 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
959 ; SSE41-LABEL: shuffle_v8i16_45630127:
961 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
964 ; AVX-LABEL: shuffle_v8i16_45630127:
966 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
968 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
969 ret <8 x i16> %shuffle
972 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
973 ; SSE2-LABEL: shuffle_v8i16_37102735:
975 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
976 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
977 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
978 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
979 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
980 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
983 ; SSSE3-LABEL: shuffle_v8i16_37102735:
985 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
988 ; SSE41-LABEL: shuffle_v8i16_37102735:
990 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
993 ; AVX-LABEL: shuffle_v8i16_37102735:
995 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
997 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
998 ret <8 x i16> %shuffle
1001 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
1002 ; SSE-LABEL: shuffle_v8i16_08192a3b:
1004 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1007 ; AVX-LABEL: shuffle_v8i16_08192a3b:
1009 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1011 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1012 ret <8 x i16> %shuffle
1015 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
1016 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
1018 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1019 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1022 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
1024 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1025 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1027 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
1028 ret <8 x i16> %shuffle
1031 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
1032 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
1034 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1037 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
1039 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1041 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1042 ret <8 x i16> %shuffle
1045 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
1046 ; SSE-LABEL: shuffle_v8i16_48596a7b:
1048 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1049 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1052 ; AVX-LABEL: shuffle_v8i16_48596a7b:
1054 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1055 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1057 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
1058 ret <8 x i16> %shuffle
1061 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
1062 ; SSE-LABEL: shuffle_v8i16_08196e7f:
1064 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1065 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1066 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1069 ; AVX1-LABEL: shuffle_v8i16_08196e7f:
1071 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1072 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1073 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1076 ; AVX2OR512VL-LABEL: shuffle_v8i16_08196e7f:
1077 ; AVX2OR512VL: # %bb.0:
1078 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1079 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1080 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1081 ; AVX2OR512VL-NEXT: retq
1083 ; XOP-LABEL: shuffle_v8i16_08196e7f:
1085 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[2,3],xmm1[2,3],xmm0[12,13],xmm1[12,13],xmm0[14,15],xmm1[14,15]
1087 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
1088 ret <8 x i16> %shuffle
1091 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
1092 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
1094 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1095 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1096 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1099 ; AVX1-LABEL: shuffle_v8i16_0c1d6879:
1101 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1102 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1103 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1106 ; AVX2OR512VL-LABEL: shuffle_v8i16_0c1d6879:
1107 ; AVX2OR512VL: # %bb.0:
1108 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1109 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1110 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1111 ; AVX2OR512VL-NEXT: retq
1113 ; XOP-LABEL: shuffle_v8i16_0c1d6879:
1115 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],xmm1[8,9],xmm0[2,3],xmm1[10,11],xmm0[12,13],xmm1[0,1],xmm0[14,15],xmm1[2,3]
1117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
1118 ret <8 x i16> %shuffle
1121 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
1122 ; SSE-LABEL: shuffle_v8i16_109832ba:
1124 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1125 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1126 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1129 ; AVX1-LABEL: shuffle_v8i16_109832ba:
1131 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1132 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1133 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1136 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
1137 ; AVX2-SLOW: # %bb.0:
1138 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1139 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1140 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1141 ; AVX2-SLOW-NEXT: retq
1143 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
1144 ; AVX2-FAST: # %bb.0:
1145 ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1146 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1147 ; AVX2-FAST-NEXT: retq
1149 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
1150 ; AVX512VL-SLOW: # %bb.0:
1151 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1152 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1153 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1154 ; AVX512VL-SLOW-NEXT: retq
1156 ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
1157 ; AVX512VL-FAST: # %bb.0:
1158 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,9,8,3,2,11,10]
1159 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1160 ; AVX512VL-FAST-NEXT: retq
1162 ; XOP-LABEL: shuffle_v8i16_109832ba:
1164 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],xmm1[2,3,0,1],xmm0[6,7,4,5],xmm1[6,7,4,5]
1166 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
1167 ret <8 x i16> %shuffle
1170 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
1171 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
1173 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1174 ; SSE-NEXT: movdqa %xmm1, %xmm0
1177 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
1179 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1181 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
1182 ret <8 x i16> %shuffle
1184 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
1185 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
1187 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1188 ; SSE-NEXT: movdqa %xmm1, %xmm0
1191 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
1193 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1195 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
1196 ret <8 x i16> %shuffle
1199 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1200 ; SSE2-LABEL: shuffle_v8i16_0213cedf:
1202 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1203 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1204 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1207 ; SSSE3-LABEL: shuffle_v8i16_0213cedf:
1209 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1210 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1211 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1214 ; SSE41-LABEL: shuffle_v8i16_0213cedf:
1216 ; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1217 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1218 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1221 ; AVX1-LABEL: shuffle_v8i16_0213cedf:
1223 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1224 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1225 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1228 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
1229 ; AVX2-SLOW: # %bb.0:
1230 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1231 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1232 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1233 ; AVX2-SLOW-NEXT: retq
1235 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
1236 ; AVX2-FAST: # %bb.0:
1237 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,u,u,u,u,u,u,u,u]
1238 ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1239 ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1240 ; AVX2-FAST-NEXT: retq
1242 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
1243 ; AVX512VL-SLOW: # %bb.0:
1244 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1245 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1246 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1247 ; AVX512VL-SLOW-NEXT: retq
1249 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
1250 ; AVX512VL-FAST: # %bb.0:
1251 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15]
1252 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1253 ; AVX512VL-FAST-NEXT: retq
1255 ; XOP-LABEL: shuffle_v8i16_0213cedf:
1257 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,4,5,2,3,6,7],xmm1[8,9,12,13,10,11,14,15]
1259 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1260 ret <8 x i16> %shuffle
1263 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1264 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1266 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1267 ; SSE2-NEXT: pand %xmm2, %xmm0
1268 ; SSE2-NEXT: pandn %xmm1, %xmm2
1269 ; SSE2-NEXT: por %xmm0, %xmm2
1270 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1271 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1274 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1276 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1277 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1278 ; SSSE3-NEXT: por %xmm1, %xmm0
1281 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1283 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1284 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1285 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1288 ; AVX1-LABEL: shuffle_v8i16_443aXXXX:
1290 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1292 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1295 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1296 ; AVX2-SLOW: # %bb.0:
1297 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1298 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1299 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1300 ; AVX2-SLOW-NEXT: retq
1302 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
1303 ; AVX2-FAST: # %bb.0:
1304 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1305 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1306 ; AVX2-FAST-NEXT: retq
1308 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1309 ; AVX512VL-SLOW: # %bb.0:
1310 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1311 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1312 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1313 ; AVX512VL-SLOW-NEXT: retq
1315 ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
1316 ; AVX512VL-FAST: # %bb.0:
1317 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,4,3,10,4,5,6,7]
1318 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1319 ; AVX512VL-FAST-NEXT: retq
1321 ; XOP-LABEL: shuffle_v8i16_443aXXXX:
1323 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],xmm1[4,5],xmm0[8,9,10,11,12,13,14,15]
1325 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1326 ret <8 x i16> %shuffle
1329 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1330 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1332 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1333 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
1334 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1335 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1336 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1339 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1341 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1342 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1343 ; SSSE3-NEXT: por %xmm1, %xmm0
1346 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1348 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1349 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1352 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1354 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1355 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1358 ; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1360 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1361 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1364 ; AVX512VL-LABEL: shuffle_v8i16_032dXXXX:
1365 ; AVX512VL: # %bb.0:
1366 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,3,2,13,0,13,0,1]
1367 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1368 ; AVX512VL-NEXT: retq
1370 ; XOP-LABEL: shuffle_v8i16_032dXXXX:
1372 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],xmm1[10,11],xmm0[0,1],xmm1[10,11],xmm0[0,1,2,3]
1374 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1375 ret <8 x i16> %shuffle
1377 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1378 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1380 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1383 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1385 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,2,3,3]
1387 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1388 ret <8 x i16> %shuffle
1391 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1392 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1394 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1395 ; SSE2-NEXT: pand %xmm2, %xmm0
1396 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1397 ; SSE2-NEXT: pandn %xmm1, %xmm2
1398 ; SSE2-NEXT: por %xmm2, %xmm0
1401 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1403 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1404 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1405 ; SSSE3-NEXT: por %xmm1, %xmm0
1408 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1410 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1411 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1414 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
1416 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1417 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1419 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1420 ret <8 x i16> %shuffle
1423 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1424 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1426 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1427 ; SSE2-NEXT: pand %xmm2, %xmm1
1428 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1429 ; SSE2-NEXT: pandn %xmm0, %xmm2
1430 ; SSE2-NEXT: por %xmm1, %xmm2
1431 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1434 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1436 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1437 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1438 ; SSSE3-NEXT: por %xmm1, %xmm0
1441 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1443 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1444 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1447 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1449 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1450 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1453 ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
1454 ; AVX2OR512VL: # %bb.0:
1455 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0
1456 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1457 ; AVX2OR512VL-NEXT: retq
1459 ; XOPAVX1-LABEL: shuffle_v8i16_XXXXcde3:
1461 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1462 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1463 ; XOPAVX1-NEXT: retq
1465 ; XOPAVX2-LABEL: shuffle_v8i16_XXXXcde3:
1467 ; XOPAVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1468 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1469 ; XOPAVX2-NEXT: retq
1470 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1471 ret <8 x i16> %shuffle
1474 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1475 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1477 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1478 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1479 ; SSE2-NEXT: pand %xmm2, %xmm1
1480 ; SSE2-NEXT: pandn %xmm0, %xmm2
1481 ; SSE2-NEXT: por %xmm1, %xmm2
1482 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1485 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1487 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1488 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1489 ; SSSE3-NEXT: por %xmm1, %xmm0
1492 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1494 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1495 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1498 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1500 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1501 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1503 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1504 ret <8 x i16> %shuffle
1507 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1508 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
1510 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1511 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
1512 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1513 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1514 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1515 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1516 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1517 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1520 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1522 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1523 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1524 ; SSSE3-NEXT: por %xmm1, %xmm0
1527 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
1529 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1530 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1533 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
1535 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1536 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1539 ; AVX2-LABEL: shuffle_v8i16_012dcde3:
1541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1542 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1545 ; AVX512VL-LABEL: shuffle_v8i16_012dcde3:
1546 ; AVX512VL: # %bb.0:
1547 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,12,13,14,3]
1548 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1549 ; AVX512VL-NEXT: retq
1551 ; XOP-LABEL: shuffle_v8i16_012dcde3:
1553 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[10,11,8,9,10,11,12,13],xmm0[6,7]
1555 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1556 ret <8 x i16> %shuffle
1559 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1560 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
1562 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1563 ; SSE2-NEXT: andps %xmm2, %xmm0
1564 ; SSE2-NEXT: andnps %xmm1, %xmm2
1565 ; SSE2-NEXT: orps %xmm2, %xmm0
1568 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1570 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1571 ; SSSE3-NEXT: andps %xmm2, %xmm0
1572 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1573 ; SSSE3-NEXT: orps %xmm2, %xmm0
1576 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
1578 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1581 ; AVX-LABEL: shuffle_v8i16_0923cde7:
1583 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1585 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1586 ret <8 x i16> %shuffle
1589 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1590 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1592 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0]
1593 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1594 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1595 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1596 ; SSE2-NEXT: pand %xmm1, %xmm0
1597 ; SSE2-NEXT: pandn %xmm2, %xmm1
1598 ; SSE2-NEXT: por %xmm0, %xmm1
1599 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1602 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1604 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1605 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1606 ; SSSE3-NEXT: por %xmm1, %xmm0
1609 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1611 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1612 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1613 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1614 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1617 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1619 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1620 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1621 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1622 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1625 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1626 ; AVX2-SLOW: # %bb.0:
1627 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1628 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1629 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1630 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1631 ; AVX2-SLOW-NEXT: retq
1633 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
1634 ; AVX2-FAST: # %bb.0:
1635 ; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1636 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u]
1637 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1638 ; AVX2-FAST-NEXT: retq
1640 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1641 ; AVX512VL-SLOW: # %bb.0:
1642 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1643 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1644 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1645 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1646 ; AVX512VL-SLOW-NEXT: retq
1648 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
1649 ; AVX512VL-FAST: # %bb.0:
1650 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,4,5,7,9]
1651 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1652 ; AVX512VL-FAST-NEXT: retq
1654 ; XOP-LABEL: shuffle_v8i16_XXX1X579:
1656 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15],xmm1[2,3]
1658 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1659 ret <8 x i16> %shuffle
1662 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1663 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1665 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1666 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1667 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1668 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3]
1671 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1673 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1674 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1675 ; SSSE3-NEXT: por %xmm1, %xmm0
1678 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1680 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1681 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1682 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1685 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1687 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1688 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1689 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1692 ; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1694 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1695 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1696 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1699 ; AVX512VL-LABEL: shuffle_v8i16_XX4X8acX:
1700 ; AVX512VL: # %bb.0:
1701 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,4,5,8,10,12,10]
1702 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1703 ; AVX512VL-NEXT: retq
1705 ; XOP-LABEL: shuffle_v8i16_XX4X8acX:
1707 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,10,11,8,9,10,11],xmm1[0,1,4,5,8,9,4,5]
1709 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1710 ret <8 x i16> %shuffle
1713 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1714 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1716 ; SSE-NEXT: movzwl %di, %eax
1717 ; SSE-NEXT: movd %eax, %xmm0
1720 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1722 ; AVX-NEXT: movzwl %di, %eax
1723 ; AVX-NEXT: vmovd %eax, %xmm0
1725 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1726 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1727 ret <8 x i16> %shuffle
1730 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1731 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1733 ; SSE-NEXT: pxor %xmm0, %xmm0
1734 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1737 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1739 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1740 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1742 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1743 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1744 ret <8 x i16> %shuffle
1747 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1748 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1750 ; SSE-NEXT: pxor %xmm0, %xmm0
1751 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1754 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1756 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1757 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1759 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1760 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1761 ret <8 x i16> %shuffle
1764 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1765 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1767 ; SSE-NEXT: pxor %xmm0, %xmm0
1768 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1771 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1773 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1774 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1776 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1777 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1778 ret <8 x i16> %shuffle
1781 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1782 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1784 ; SSE-NEXT: pxor %xmm0, %xmm0
1785 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1788 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1790 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1791 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1793 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1794 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1795 ret <8 x i16> %shuffle
1798 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1799 ; SSE2-LABEL: shuffle_v8i16_def01234:
1801 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1802 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1803 ; SSE2-NEXT: por %xmm1, %xmm0
1806 ; SSSE3-LABEL: shuffle_v8i16_def01234:
1808 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1811 ; SSE41-LABEL: shuffle_v8i16_def01234:
1813 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1816 ; AVX-LABEL: shuffle_v8i16_def01234:
1818 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1820 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1821 ret <8 x i16> %shuffle
1824 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1825 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1827 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1828 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1829 ; SSE2-NEXT: por %xmm1, %xmm0
1832 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1834 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1837 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1839 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1842 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
1844 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1846 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1847 ret <8 x i16> %shuffle
1850 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1851 ; SSE2-LABEL: shuffle_v8i16_56701234:
1853 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1854 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1855 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1856 ; SSE2-NEXT: por %xmm1, %xmm0
1859 ; SSSE3-LABEL: shuffle_v8i16_56701234:
1861 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1864 ; SSE41-LABEL: shuffle_v8i16_56701234:
1866 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1869 ; AVX-LABEL: shuffle_v8i16_56701234:
1871 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1873 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1874 ret <8 x i16> %shuffle
1877 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1878 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1880 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1881 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1882 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1883 ; SSE2-NEXT: por %xmm1, %xmm0
1886 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1888 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1891 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1893 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1896 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
1898 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1900 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1901 ret <8 x i16> %shuffle
1904 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1905 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
1907 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1910 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
1912 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1914 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1915 ret <8 x i16> %shuffle
1918 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1919 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
1921 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1922 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1923 ; SSE2-NEXT: por %xmm1, %xmm0
1926 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1928 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1931 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
1933 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1936 ; AVX-LABEL: shuffle_v8i16_bcdef012:
1938 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1940 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1941 ret <8 x i16> %shuffle
1944 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1945 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1947 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1948 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1949 ; SSE2-NEXT: por %xmm1, %xmm0
1952 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1954 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1957 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1959 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1962 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1964 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1967 ret <8 x i16> %shuffle
1970 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1971 ; SSE2-LABEL: shuffle_v8i16_34567012:
1973 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1974 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1975 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1976 ; SSE2-NEXT: por %xmm1, %xmm0
1979 ; SSSE3-LABEL: shuffle_v8i16_34567012:
1981 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1984 ; SSE41-LABEL: shuffle_v8i16_34567012:
1986 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1989 ; AVX-LABEL: shuffle_v8i16_34567012:
1991 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1993 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1994 ret <8 x i16> %shuffle
1997 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1998 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
2000 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2001 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2002 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2003 ; SSE2-NEXT: por %xmm1, %xmm0
2006 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
2008 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
2011 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
2013 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
2016 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
2018 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
2020 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
2021 ret <8 x i16> %shuffle
2024 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
2025 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
2027 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2030 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
2032 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2034 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
2035 ret <8 x i16> %shuffle
2038 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
2039 ; SSE2-LABEL: shuffle_v8i16_3456789a:
2041 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2042 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
2043 ; SSE2-NEXT: por %xmm1, %xmm0
2046 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
2048 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2049 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2052 ; SSE41-LABEL: shuffle_v8i16_3456789a:
2054 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2055 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2058 ; AVX-LABEL: shuffle_v8i16_3456789a:
2060 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2062 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
2063 ret <8 x i16> %shuffle
2066 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
2067 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
2069 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2070 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
2071 ; SSE2-NEXT: por %xmm1, %xmm0
2074 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
2076 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2077 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2080 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
2082 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2083 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2086 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
2088 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2090 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
2091 ret <8 x i16> %shuffle
2094 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
2095 ; SSE2-LABEL: shuffle_v8i16_56789abc:
2097 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2098 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
2099 ; SSE2-NEXT: por %xmm1, %xmm0
2102 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
2104 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2105 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2108 ; SSE41-LABEL: shuffle_v8i16_56789abc:
2110 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2111 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2114 ; AVX-LABEL: shuffle_v8i16_56789abc:
2116 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2118 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
2119 ret <8 x i16> %shuffle
2122 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
2123 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
2125 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2126 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
2127 ; SSE2-NEXT: por %xmm1, %xmm0
2130 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
2132 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2133 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2136 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
2138 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2139 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2142 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
2144 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2146 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2147 ret <8 x i16> %shuffle
2150 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
2151 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
2153 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2154 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2157 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
2159 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2160 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2163 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
2165 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2168 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
2170 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2172 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
2173 ret <8 x i16> %shuffle
2176 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
2177 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
2179 ; SSE2-NEXT: pxor %xmm1, %xmm1
2180 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2181 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2184 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
2186 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2187 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2188 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2191 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
2193 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2196 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
2198 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2200 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2201 ret <8 x i16> %shuffle
2204 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
2205 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
2207 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2210 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
2212 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2215 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
2217 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2220 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
2222 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2224 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
2225 ret <8 x i16> %shuffle
2228 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
2229 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
2231 ; SSE2-NEXT: pxor %xmm1, %xmm1
2232 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2235 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
2237 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2238 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2241 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
2243 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2246 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
2248 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2250 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2251 ret <8 x i16> %shuffle
2254 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
2255 ; SSE-LABEL: shuffle_v8i16_01100110:
2257 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2258 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2261 ; AVX1-LABEL: shuffle_v8i16_01100110:
2263 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2264 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2267 ; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
2268 ; AVX2-SLOW: # %bb.0:
2269 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2270 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2271 ; AVX2-SLOW-NEXT: retq
2273 ; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
2274 ; AVX2-FAST: # %bb.0:
2275 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2276 ; AVX2-FAST-NEXT: retq
2278 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
2279 ; AVX512VL-SLOW: # %bb.0:
2280 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2281 ; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2282 ; AVX512VL-SLOW-NEXT: retq
2284 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
2285 ; AVX512VL-FAST: # %bb.0:
2286 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2287 ; AVX512VL-FAST-NEXT: retq
2289 ; XOPAVX1-LABEL: shuffle_v8i16_01100110:
2291 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2292 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2293 ; XOPAVX1-NEXT: retq
2295 ; XOPAVX2-LABEL: shuffle_v8i16_01100110:
2297 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2298 ; XOPAVX2-NEXT: vpbroadcastq %xmm0, %xmm0
2299 ; XOPAVX2-NEXT: retq
2300 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
2301 ret <8 x i16> %shuffle
2304 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
2305 ; SSE-LABEL: shuffle_v8i16_01u0u110:
2307 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2308 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2311 ; AVX1-LABEL: shuffle_v8i16_01u0u110:
2313 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2314 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2317 ; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
2318 ; AVX2-SLOW: # %bb.0:
2319 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2320 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2321 ; AVX2-SLOW-NEXT: retq
2323 ; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
2324 ; AVX2-FAST: # %bb.0:
2325 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2326 ; AVX2-FAST-NEXT: retq
2328 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
2329 ; AVX512VL-SLOW: # %bb.0:
2330 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2331 ; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2332 ; AVX512VL-SLOW-NEXT: retq
2334 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
2335 ; AVX512VL-FAST: # %bb.0:
2336 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2337 ; AVX512VL-FAST-NEXT: retq
2339 ; XOPAVX1-LABEL: shuffle_v8i16_01u0u110:
2341 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2342 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2343 ; XOPAVX1-NEXT: retq
2345 ; XOPAVX2-LABEL: shuffle_v8i16_01u0u110:
2347 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2348 ; XOPAVX2-NEXT: vpbroadcastq %xmm0, %xmm0
2349 ; XOPAVX2-NEXT: retq
2350 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
2351 ret <8 x i16> %shuffle
2354 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
2355 ; SSE-LABEL: shuffle_v8i16_467uu675:
2357 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2358 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2361 ; AVX1-LABEL: shuffle_v8i16_467uu675:
2363 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2364 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2367 ; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
2368 ; AVX2-SLOW: # %bb.0:
2369 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2370 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2371 ; AVX2-SLOW-NEXT: retq
2373 ; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
2374 ; AVX2-FAST: # %bb.0:
2375 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2376 ; AVX2-FAST-NEXT: retq
2378 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
2379 ; AVX512VL-SLOW: # %bb.0:
2380 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2381 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2382 ; AVX512VL-SLOW-NEXT: retq
2384 ; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
2385 ; AVX512VL-FAST: # %bb.0:
2386 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2387 ; AVX512VL-FAST-NEXT: retq
2389 ; XOP-LABEL: shuffle_v8i16_467uu675:
2391 ; XOP-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2392 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2394 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
2395 ret <8 x i16> %shuffle
2398 define <8 x i16> @shuffle_v8i16_10325476(<8 x i16> %a) {
2399 ; SSE-LABEL: shuffle_v8i16_10325476:
2401 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
2402 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
2405 ; AVX1-LABEL: shuffle_v8i16_10325476:
2407 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
2408 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
2411 ; AVX2-SLOW-LABEL: shuffle_v8i16_10325476:
2412 ; AVX2-SLOW: # %bb.0:
2413 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
2414 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
2415 ; AVX2-SLOW-NEXT: retq
2417 ; AVX2-FAST-LABEL: shuffle_v8i16_10325476:
2418 ; AVX2-FAST: # %bb.0:
2419 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
2420 ; AVX2-FAST-NEXT: retq
2422 ; AVX512VL-LABEL: shuffle_v8i16_10325476:
2423 ; AVX512VL: # %bb.0:
2424 ; AVX512VL-NEXT: vprold $16, %xmm0, %xmm0
2425 ; AVX512VL-NEXT: retq
2427 ; XOP-LABEL: shuffle_v8i16_10325476:
2429 ; XOP-NEXT: vprotd $16, %xmm0, %xmm0
2431 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
2432 ret <8 x i16> %shuffle
2435 define <8 x i16> @shuffle_v8i16_12305674(<8 x i16> %a) {
2436 ; SSE-LABEL: shuffle_v8i16_12305674:
2438 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2439 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2442 ; AVX1-LABEL: shuffle_v8i16_12305674:
2444 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2445 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2448 ; AVX2-SLOW-LABEL: shuffle_v8i16_12305674:
2449 ; AVX2-SLOW: # %bb.0:
2450 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2451 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2452 ; AVX2-SLOW-NEXT: retq
2454 ; AVX2-FAST-LABEL: shuffle_v8i16_12305674:
2455 ; AVX2-FAST: # %bb.0:
2456 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9]
2457 ; AVX2-FAST-NEXT: retq
2459 ; AVX512VL-LABEL: shuffle_v8i16_12305674:
2460 ; AVX512VL: # %bb.0:
2461 ; AVX512VL-NEXT: vprolq $48, %xmm0, %xmm0
2462 ; AVX512VL-NEXT: retq
2464 ; XOP-LABEL: shuffle_v8i16_12305674:
2466 ; XOP-NEXT: vprotq $48, %xmm0, %xmm0
2468 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2469 ret <8 x i16> %shuffle
2472 define <8 x i16> @shuffle_v8i16_02460246(<8 x i16> %a) {
2473 ; SSE2-LABEL: shuffle_v8i16_02460246:
2475 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
2476 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
2477 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
2478 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2479 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,5]
2482 ; SSSE3-LABEL: shuffle_v8i16_02460246:
2484 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
2487 ; SSE41-LABEL: shuffle_v8i16_02460246:
2489 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
2492 ; AVX-LABEL: shuffle_v8i16_02460246:
2494 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
2496 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
2497 ret <8 x i16> %shuffle
2500 define <8 x i16> @shuffle_v8i16_04040404(<8 x i16> %a) {
2501 ; SSE2-LABEL: shuffle_v8i16_04040404:
2503 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
2504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
2505 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,6,4]
2508 ; SSSE3-LABEL: shuffle_v8i16_04040404:
2510 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,0,1,8,9,0,1,8,9,0,1,8,9]
2513 ; SSE41-LABEL: shuffle_v8i16_04040404:
2515 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,0,1,8,9,0,1,8,9,0,1,8,9]
2518 ; AVX-LABEL: shuffle_v8i16_04040404:
2520 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,0,1,8,9,0,1,8,9,0,1,8,9]
2522 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 4, i32 0, i32 4, i32 0, i32 4, i32 0, i32 4>
2523 ret <8 x i16> %shuffle
2526 define <8 x i16> @shuffle_v8i16_02468ACE(<8 x i16> %a, <8 x i16> %b) {
2527 ; SSE2-LABEL: shuffle_v8i16_02468ACE:
2529 ; SSE2-NEXT: pslld $16, %xmm1
2530 ; SSE2-NEXT: psrad $16, %xmm1
2531 ; SSE2-NEXT: pslld $16, %xmm0
2532 ; SSE2-NEXT: psrad $16, %xmm0
2533 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2536 ; SSSE3-LABEL: shuffle_v8i16_02468ACE:
2538 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2539 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
2540 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
2541 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2544 ; SSE41-LABEL: shuffle_v8i16_02468ACE:
2546 ; SSE41-NEXT: pxor %xmm2, %xmm2
2547 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2548 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2549 ; SSE41-NEXT: packusdw %xmm1, %xmm0
2552 ; AVX1-LABEL: shuffle_v8i16_02468ACE:
2554 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2555 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2556 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2557 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2560 ; AVX2-LABEL: shuffle_v8i16_02468ACE:
2562 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2563 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2564 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2565 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2568 ; AVX512VL-LABEL: shuffle_v8i16_02468ACE:
2569 ; AVX512VL: # %bb.0:
2570 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2571 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2572 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2573 ; AVX512VL-NEXT: vzeroupper
2574 ; AVX512VL-NEXT: retq
2576 ; XOP-LABEL: shuffle_v8i16_02468ACE:
2578 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],xmm1[0,1,4,5,8,9,12,13]
2580 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
2581 ret <8 x i16> %shuffle
2584 define <8 x i16> @shuffle_v8i16_048C048C(<8 x i16> %a, <8 x i16> %b) {
2585 ; SSE2-LABEL: shuffle_v8i16_048C048C:
2587 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2588 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,0,2,4,5,6,7]
2589 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2590 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
2591 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2594 ; SSSE3-LABEL: shuffle_v8i16_048C048C:
2596 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2597 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,0,2,4,5,6,7]
2598 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2599 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
2600 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2603 ; SSE41-LABEL: shuffle_v8i16_048C048C:
2605 ; SSE41-NEXT: pxor %xmm2, %xmm2
2606 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2607 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2608 ; SSE41-NEXT: packusdw %xmm1, %xmm0
2609 ; SSE41-NEXT: packusdw %xmm0, %xmm0
2612 ; AVX1-LABEL: shuffle_v8i16_048C048C:
2614 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2615 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2616 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2617 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2618 ; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2621 ; AVX2-LABEL: shuffle_v8i16_048C048C:
2623 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2624 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2625 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2626 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2627 ; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2630 ; AVX512VL-LABEL: shuffle_v8i16_048C048C:
2631 ; AVX512VL: # %bb.0:
2632 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12]
2633 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
2634 ; AVX512VL-NEXT: retq
2636 ; XOP-LABEL: shuffle_v8i16_048C048C:
2638 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9],xmm1[0,1,8,9],xmm0[0,1,8,9],xmm1[0,1,8,9]
2640 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 0, i32 4, i32 8, i32 12>
2641 ret <8 x i16> %shuffle
2645 ; Shuffle to logical bit shifts
2647 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
2648 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
2650 ; SSE-NEXT: pslld $16, %xmm0
2653 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
2655 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
2657 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
2658 ret <8 x i16> %shuffle
2661 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
2662 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
2664 ; SSE-NEXT: psllq $48, %xmm0
2667 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
2669 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
2671 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
2672 ret <8 x i16> %shuffle
2675 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
2676 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
2678 ; SSE-NEXT: psllq $32, %xmm0
2681 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
2683 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2685 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
2686 ret <8 x i16> %shuffle
2689 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
2690 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
2692 ; SSE-NEXT: psllq $16, %xmm0
2695 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
2697 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
2699 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
2700 ret <8 x i16> %shuffle
2703 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
2704 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
2706 ; SSE-NEXT: psrld $16, %xmm0
2709 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
2711 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
2713 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
2714 ret <8 x i16> %shuffle
2717 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
2718 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
2720 ; SSE-NEXT: psrlq $16, %xmm0
2723 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
2725 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
2727 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2728 ret <8 x i16> %shuffle
2731 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2732 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
2734 ; SSE-NEXT: psrlq $32, %xmm0
2737 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
2739 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2741 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2742 ret <8 x i16> %shuffle
2745 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2746 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2748 ; SSE-NEXT: psrlq $48, %xmm0
2751 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2753 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2755 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2756 ret <8 x i16> %shuffle
2759 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2760 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2762 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2765 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2767 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2769 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2770 ret <8 x i16> %shuffle
2773 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2774 ; SSE2-LABEL: shuffle_v8i16_0z234567:
2776 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2779 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
2781 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2784 ; SSE41-LABEL: shuffle_v8i16_0z234567:
2786 ; SSE41-NEXT: pxor %xmm1, %xmm1
2787 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2790 ; AVX-LABEL: shuffle_v8i16_0z234567:
2792 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2793 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2795 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2796 ret <8 x i16> %shuffle
2799 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2800 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2802 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2805 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2807 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2810 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2812 ; SSE41-NEXT: pxor %xmm1, %xmm1
2813 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2816 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2818 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2819 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2821 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2822 ret <8 x i16> %shuffle
2825 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2826 ; SSE2-LABEL: shuffle_v8i16_0123456z:
2828 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2831 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
2833 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2836 ; SSE41-LABEL: shuffle_v8i16_0123456z:
2838 ; SSE41-NEXT: pxor %xmm1, %xmm1
2839 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2842 ; AVX-LABEL: shuffle_v8i16_0123456z:
2844 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2845 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2847 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2848 ret <8 x i16> %shuffle
2851 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2852 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2854 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2855 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2856 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2857 ; SSE-NEXT: movdqa %xmm1, %xmm0
2860 ; AVX1-LABEL: shuffle_v8i16_fu3ucc5u:
2862 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2863 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2864 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2867 ; AVX2OR512VL-LABEL: shuffle_v8i16_fu3ucc5u:
2868 ; AVX2OR512VL: # %bb.0:
2869 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2870 ; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2871 ; AVX2OR512VL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2872 ; AVX2OR512VL-NEXT: retq
2874 ; XOP-LABEL: shuffle_v8i16_fu3ucc5u:
2876 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[14,15,10,11],xmm0[6,7,8,9],xmm1[8,9,8,9],xmm0[10,11,12,13]
2878 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2879 ret <8 x i16> %shuffle
2882 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2883 ; SSE-LABEL: shuffle_v8i16_8012345u:
2885 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2888 ; AVX-LABEL: shuffle_v8i16_8012345u:
2890 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2892 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2893 ret <8 x i16> %shuffle
2896 define <8 x i16> @shuffle_v8i16_02468ace(<8 x i16> %a, <8 x i16> %b) {
2897 ; SSE2-LABEL: shuffle_v8i16_02468ace:
2899 ; SSE2-NEXT: pslld $16, %xmm1
2900 ; SSE2-NEXT: psrad $16, %xmm1
2901 ; SSE2-NEXT: pslld $16, %xmm0
2902 ; SSE2-NEXT: psrad $16, %xmm0
2903 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2906 ; SSSE3-LABEL: shuffle_v8i16_02468ace:
2908 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2909 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
2910 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
2911 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2914 ; SSE41-LABEL: shuffle_v8i16_02468ace:
2916 ; SSE41-NEXT: pxor %xmm2, %xmm2
2917 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2918 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2919 ; SSE41-NEXT: packusdw %xmm1, %xmm0
2922 ; AVX1-LABEL: shuffle_v8i16_02468ace:
2924 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2926 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2927 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2930 ; AVX2-LABEL: shuffle_v8i16_02468ace:
2932 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2933 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2934 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2935 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2938 ; AVX512VL-LABEL: shuffle_v8i16_02468ace:
2939 ; AVX512VL: # %bb.0:
2940 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2941 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2942 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2943 ; AVX512VL-NEXT: vzeroupper
2944 ; AVX512VL-NEXT: retq
2946 ; XOP-LABEL: shuffle_v8i16_02468ace:
2948 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],xmm1[0,1,4,5,8,9,12,13]
2950 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
2951 ret <8 x i16> %shuffle
2954 define <8 x i16> @shuffle_v8i16_13579bdf(<8 x i16> %a, <8 x i16> %b) {
2955 ; SSE2-LABEL: shuffle_v8i16_13579bdf:
2957 ; SSE2-NEXT: psrad $16, %xmm1
2958 ; SSE2-NEXT: psrad $16, %xmm0
2959 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2962 ; SSSE3-LABEL: shuffle_v8i16_13579bdf:
2964 ; SSSE3-NEXT: psrad $16, %xmm1
2965 ; SSSE3-NEXT: psrad $16, %xmm0
2966 ; SSSE3-NEXT: packssdw %xmm1, %xmm0
2969 ; SSE41-LABEL: shuffle_v8i16_13579bdf:
2971 ; SSE41-NEXT: psrld $16, %xmm1
2972 ; SSE41-NEXT: psrld $16, %xmm0
2973 ; SSE41-NEXT: packusdw %xmm1, %xmm0
2976 ; AVX1-LABEL: shuffle_v8i16_13579bdf:
2978 ; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
2979 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
2980 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2983 ; AVX2OR512VL-LABEL: shuffle_v8i16_13579bdf:
2984 ; AVX2OR512VL: # %bb.0:
2985 ; AVX2OR512VL-NEXT: vpsrld $16, %xmm1, %xmm1
2986 ; AVX2OR512VL-NEXT: vpsrld $16, %xmm0, %xmm0
2987 ; AVX2OR512VL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2988 ; AVX2OR512VL-NEXT: retq
2990 ; XOP-LABEL: shuffle_v8i16_13579bdf:
2992 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,6,7,10,11,14,15],xmm1[2,3,6,7,10,11,14,15]
2994 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
2995 ret <8 x i16> %shuffle
2999 define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) {
3000 ; SSE-LABEL: shuffle_v8i16_9zzzuuuu:
3002 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3003 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3006 ; AVX1-LABEL: shuffle_v8i16_9zzzuuuu:
3008 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3009 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3012 ; AVX2-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
3013 ; AVX2-SLOW: # %bb.0:
3014 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
3015 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3016 ; AVX2-SLOW-NEXT: retq
3018 ; AVX2-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
3019 ; AVX2-FAST: # %bb.0:
3020 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3021 ; AVX2-FAST-NEXT: retq
3023 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
3024 ; AVX512VL-SLOW: # %bb.0:
3025 ; AVX512VL-SLOW-NEXT: vbroadcastss %xmm0, %xmm0
3026 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3027 ; AVX512VL-SLOW-NEXT: retq
3029 ; AVX512VL-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
3030 ; AVX512VL-FAST: # %bb.0:
3031 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3032 ; AVX512VL-FAST-NEXT: retq
3034 ; XOP-LABEL: shuffle_v8i16_9zzzuuuu:
3036 ; XOP-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
3038 %r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
3043 define <8 x i16> @shuffle_v8i16_2zzzuuuu(<8 x i16> %x) {
3044 ; SSE-LABEL: shuffle_v8i16_2zzzuuuu:
3046 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
3047 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3050 ; AVX1-LABEL: shuffle_v8i16_2zzzuuuu:
3052 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
3053 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3056 ; AVX2-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
3057 ; AVX2-SLOW: # %bb.0:
3058 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
3059 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3060 ; AVX2-SLOW-NEXT: retq
3062 ; AVX2-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
3063 ; AVX2-FAST: # %bb.0:
3064 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3065 ; AVX2-FAST-NEXT: retq
3067 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
3068 ; AVX512VL-SLOW: # %bb.0:
3069 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
3070 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3071 ; AVX512VL-SLOW-NEXT: retq
3073 ; AVX512VL-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
3074 ; AVX512VL-FAST: # %bb.0:
3075 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3076 ; AVX512VL-FAST-NEXT: retq
3078 ; XOP-LABEL: shuffle_v8i16_2zzzuuuu:
3080 ; XOP-NEXT: extrq {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
3082 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
3086 define <8 x i16> @shuffle_v8i16_3uu6zzzz(<8 x i16> %x) {
3087 ; SSE-LABEL: shuffle_v8i16_3uu6zzzz:
3089 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3090 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3093 ; AVX1-LABEL: shuffle_v8i16_3uu6zzzz:
3095 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3096 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3099 ; AVX2-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
3100 ; AVX2-SLOW: # %bb.0:
3101 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3102 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3103 ; AVX2-SLOW-NEXT: retq
3105 ; AVX2-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
3106 ; AVX2-FAST: # %bb.0:
3107 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
3108 ; AVX2-FAST-NEXT: retq
3110 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
3111 ; AVX512VL-SLOW: # %bb.0:
3112 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3113 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3114 ; AVX512VL-SLOW-NEXT: retq
3116 ; AVX512VL-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
3117 ; AVX512VL-FAST: # %bb.0:
3118 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
3119 ; AVX512VL-FAST-NEXT: retq
3121 ; XOP-LABEL: shuffle_v8i16_3uu6zzzz:
3123 ; XOP-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3124 ; XOP-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3126 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 8, i32 8, i32 8, i32 8>
3130 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
3131 ; SSE2-LABEL: mask_v8i16_012345ef:
3133 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
3134 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
3135 ; SSE2-NEXT: movaps %xmm1, %xmm0
3138 ; SSSE3-LABEL: mask_v8i16_012345ef:
3140 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
3141 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
3142 ; SSSE3-NEXT: movaps %xmm1, %xmm0
3145 ; SSE41-LABEL: mask_v8i16_012345ef:
3147 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
3150 ; AVX-LABEL: mask_v8i16_012345ef:
3152 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
3154 %1 = bitcast <8 x i16> %a to <2 x i64>
3155 %2 = bitcast <8 x i16> %b to <2 x i64>
3156 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
3157 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
3158 %5 = or <2 x i64> %4, %3
3159 %6 = bitcast <2 x i64> %5 to <8 x i16>
3163 define <8 x i16> @insert_dup_mem_v8i16_i32(ptr %ptr) {
3164 ; SSE-LABEL: insert_dup_mem_v8i16_i32:
3166 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3167 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3168 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3171 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
3173 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3174 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3175 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3178 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
3179 ; AVX2OR512VL: # %bb.0:
3180 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3181 ; AVX2OR512VL-NEXT: retq
3183 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_i32:
3185 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3186 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3187 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3188 ; XOPAVX1-NEXT: retq
3190 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_i32:
3192 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3193 ; XOPAVX2-NEXT: retq
3194 %tmp = load i32, ptr %ptr, align 4
3195 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3196 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3197 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
3201 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(ptr %ptr) {
3202 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
3204 ; SSE-NEXT: movzwl (%rdi), %eax
3205 ; SSE-NEXT: movd %eax, %xmm0
3206 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3207 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3210 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
3212 ; AVX1-NEXT: movzwl (%rdi), %eax
3213 ; AVX1-NEXT: vmovd %eax, %xmm0
3214 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3215 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3218 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
3219 ; AVX2OR512VL: # %bb.0:
3220 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3221 ; AVX2OR512VL-NEXT: retq
3223 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
3225 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
3226 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3227 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3228 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3229 ; XOPAVX1-NEXT: retq
3231 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
3233 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3234 ; XOPAVX2-NEXT: retq
3235 %tmp = load i16, ptr %ptr, align 2
3236 %tmp1 = sext i16 %tmp to i32
3237 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
3238 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3239 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
3243 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(ptr %ptr) {
3244 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
3246 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3247 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3248 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3251 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
3253 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3254 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3255 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3258 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
3259 ; AVX2OR512VL: # %bb.0:
3260 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
3261 ; AVX2OR512VL-NEXT: retq
3263 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
3265 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3266 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3267 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3268 ; XOPAVX1-NEXT: retq
3270 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
3272 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
3273 ; XOPAVX2-NEXT: retq
3274 %tmp = load i32, ptr %ptr, align 4
3275 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3276 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3277 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3281 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(ptr %ptr) {
3282 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
3284 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3285 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3286 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3289 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
3291 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3292 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3295 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
3297 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3298 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3301 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
3303 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm0
3304 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3305 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3308 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
3309 ; AVX2OR512VL: # %bb.0:
3310 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
3311 ; AVX2OR512VL-NEXT: retq
3313 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
3315 ; XOPAVX1-NEXT: vbroadcastss (%rdi), %xmm0
3316 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3317 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3318 ; XOPAVX1-NEXT: retq
3320 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
3322 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
3323 ; XOPAVX2-NEXT: retq
3324 %tmp = load i32, ptr %ptr, align 4
3325 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
3326 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3327 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3331 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(ptr %ptr) {
3332 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3334 ; SSE-NEXT: movswl (%rdi), %eax
3335 ; SSE-NEXT: movd %eax, %xmm0
3336 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3337 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3340 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3342 ; AVX1-NEXT: movswl (%rdi), %eax
3343 ; AVX1-NEXT: vmovd %eax, %xmm0
3344 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3345 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3348 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3350 ; AVX2-NEXT: movswl (%rdi), %eax
3351 ; AVX2-NEXT: shrl $16, %eax
3352 ; AVX2-NEXT: vmovd %eax, %xmm0
3353 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3356 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3357 ; AVX512VL: # %bb.0:
3358 ; AVX512VL-NEXT: movswl (%rdi), %eax
3359 ; AVX512VL-NEXT: shrl $16, %eax
3360 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
3361 ; AVX512VL-NEXT: retq
3363 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3365 ; XOPAVX1-NEXT: movswl (%rdi), %eax
3366 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3367 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3368 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3369 ; XOPAVX1-NEXT: retq
3371 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3373 ; XOPAVX2-NEXT: movswl (%rdi), %eax
3374 ; XOPAVX2-NEXT: shrl $16, %eax
3375 ; XOPAVX2-NEXT: vmovd %eax, %xmm0
3376 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3377 ; XOPAVX2-NEXT: retq
3378 %tmp = load i16, ptr %ptr, align 2
3379 %tmp1 = sext i16 %tmp to i32
3380 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
3381 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3382 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3386 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(ptr %ptr) {
3387 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3389 ; SSE2-NEXT: movswl (%rdi), %eax
3390 ; SSE2-NEXT: movd %eax, %xmm0
3391 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3392 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3395 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3397 ; SSSE3-NEXT: movswl (%rdi), %eax
3398 ; SSSE3-NEXT: movd %eax, %xmm0
3399 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3402 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3404 ; SSE41-NEXT: movswl (%rdi), %eax
3405 ; SSE41-NEXT: movd %eax, %xmm0
3406 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3409 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3411 ; AVX1-NEXT: movswl (%rdi), %eax
3412 ; AVX1-NEXT: vmovd %eax, %xmm0
3413 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3416 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3418 ; AVX2-NEXT: movswl (%rdi), %eax
3419 ; AVX2-NEXT: shrl $16, %eax
3420 ; AVX2-NEXT: vmovd %eax, %xmm0
3421 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3424 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3425 ; AVX512VL: # %bb.0:
3426 ; AVX512VL-NEXT: movswl (%rdi), %eax
3427 ; AVX512VL-NEXT: shrl $16, %eax
3428 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
3429 ; AVX512VL-NEXT: retq
3431 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3433 ; XOPAVX1-NEXT: movswl (%rdi), %eax
3434 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3435 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3436 ; XOPAVX1-NEXT: retq
3438 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3440 ; XOPAVX2-NEXT: movswl (%rdi), %eax
3441 ; XOPAVX2-NEXT: shrl $16, %eax
3442 ; XOPAVX2-NEXT: vmovd %eax, %xmm0
3443 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3444 ; XOPAVX2-NEXT: retq
3445 %tmp = load i16, ptr %ptr, align 2
3446 %tmp1 = sext i16 %tmp to i32
3447 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
3448 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3449 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3453 define <8 x i16> @insert_dup_mem_v8i16_i64(ptr %ptr) {
3454 ; SSE-LABEL: insert_dup_mem_v8i16_i64:
3456 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3457 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3458 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3461 ; AVX1-LABEL: insert_dup_mem_v8i16_i64:
3463 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3464 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3465 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3468 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i64:
3469 ; AVX2OR512VL: # %bb.0:
3470 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3471 ; AVX2OR512VL-NEXT: retq
3473 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_i64:
3475 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3476 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3477 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3478 ; XOPAVX1-NEXT: retq
3480 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_i64:
3482 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3483 ; XOPAVX2-NEXT: retq
3484 %tmp = load i64, ptr %ptr, align 4
3485 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
3486 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3487 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
3491 define <8 x i16> @insert_dup_elt1_mem_v8i16_i64(ptr %ptr) {
3492 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i64:
3494 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3495 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3496 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3499 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i64:
3501 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3502 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3503 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3506 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i64:
3507 ; AVX2OR512VL: # %bb.0:
3508 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
3509 ; AVX2OR512VL-NEXT: retq
3511 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_i64:
3513 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3514 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3515 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3516 ; XOPAVX1-NEXT: retq
3518 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_i64:
3520 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
3521 ; XOPAVX2-NEXT: retq
3522 %tmp = load i64, ptr %ptr, align 4
3523 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
3524 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3525 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3529 define <8 x i16> @insert_dup_elt3_mem_v8i16_i64(ptr %ptr) {
3530 ; SSE-LABEL: insert_dup_elt3_mem_v8i16_i64:
3532 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3533 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3534 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3537 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i64:
3539 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3540 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3541 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3544 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i64:
3545 ; AVX2OR512VL: # %bb.0:
3546 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %xmm0
3547 ; AVX2OR512VL-NEXT: retq
3549 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_i64:
3551 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3552 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3553 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3554 ; XOPAVX1-NEXT: retq
3556 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_i64:
3558 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %xmm0
3559 ; XOPAVX2-NEXT: retq
3560 %tmp = load i64, ptr %ptr, align 4
3561 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
3562 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3563 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3567 define <8 x i16> @insert_dup_elt7_mem_v8i16_i64(ptr %ptr) {
3568 ; SSE2-LABEL: insert_dup_elt7_mem_v8i16_i64:
3570 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3571 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3572 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
3573 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
3576 ; SSSE3-LABEL: insert_dup_elt7_mem_v8i16_i64:
3578 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3579 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
3582 ; SSE41-LABEL: insert_dup_elt7_mem_v8i16_i64:
3584 ; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3585 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
3588 ; AVX1-LABEL: insert_dup_elt7_mem_v8i16_i64:
3590 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
3591 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
3592 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
3595 ; AVX2OR512VL-LABEL: insert_dup_elt7_mem_v8i16_i64:
3596 ; AVX2OR512VL: # %bb.0:
3597 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %xmm0
3598 ; AVX2OR512VL-NEXT: retq
3600 ; XOPAVX1-LABEL: insert_dup_elt7_mem_v8i16_i64:
3602 ; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
3603 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
3604 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
3605 ; XOPAVX1-NEXT: retq
3607 ; XOPAVX2-LABEL: insert_dup_elt7_mem_v8i16_i64:
3609 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %xmm0
3610 ; XOPAVX2-NEXT: retq
3611 %tmp = load i64, ptr %ptr, align 4
3612 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
3613 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3614 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
3618 define <8 x i16> @insert_dup_mem_v8i16_sext_i16_i64(ptr %ptr) {
3619 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3621 ; SSE-NEXT: movzwl (%rdi), %eax
3622 ; SSE-NEXT: movd %eax, %xmm0
3623 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3624 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3627 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3629 ; AVX1-NEXT: movzwl (%rdi), %eax
3630 ; AVX1-NEXT: vmovd %eax, %xmm0
3631 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3632 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3635 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3636 ; AVX2OR512VL: # %bb.0:
3637 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3638 ; AVX2OR512VL-NEXT: retq
3640 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3642 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
3643 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3644 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3645 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3646 ; XOPAVX1-NEXT: retq
3648 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3650 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3651 ; XOPAVX2-NEXT: retq
3652 %tmp = load i16, ptr %ptr, align 2
3653 %tmp1 = sext i16 %tmp to i64
3654 %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
3655 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
3656 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer