1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-FAST
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-SLOW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=AVX,XOP,XOPAVX1
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=AVX,XOP,XOPAVX2
15 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
16 ; SSE-LABEL: shuffle_v8i16_01012323:
18 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
21 ; AVX-LABEL: shuffle_v8i16_01012323:
23 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
25 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
26 ret <8 x i16> %shuffle
28 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
29 ; SSE-LABEL: shuffle_v8i16_67452301:
31 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
34 ; AVX-LABEL: shuffle_v8i16_67452301:
36 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
38 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
39 ret <8 x i16> %shuffle
41 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
42 ; SSE2-LABEL: shuffle_v8i16_456789AB:
44 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
47 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
49 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
50 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
53 ; SSE41-LABEL: shuffle_v8i16_456789AB:
55 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
56 ; SSE41-NEXT: movdqa %xmm1, %xmm0
59 ; AVX-LABEL: shuffle_v8i16_456789AB:
61 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
63 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
64 ret <8 x i16> %shuffle
67 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
68 ; SSE-LABEL: shuffle_v8i16_00000000:
70 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
71 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
74 ; AVX1-LABEL: shuffle_v8i16_00000000:
76 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
77 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
80 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
81 ; AVX2OR512VL: # %bb.0:
82 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0
83 ; AVX2OR512VL-NEXT: retq
85 ; XOPAVX1-LABEL: shuffle_v8i16_00000000:
87 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
88 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
91 ; XOPAVX2-LABEL: shuffle_v8i16_00000000:
93 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
95 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
96 ret <8 x i16> %shuffle
98 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
99 ; SSE-LABEL: shuffle_v8i16_00004444:
101 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
102 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
105 ; AVX1-LABEL: shuffle_v8i16_00004444:
107 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
108 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
111 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
112 ; AVX2-SLOW: # %bb.0:
113 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
114 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
115 ; AVX2-SLOW-NEXT: retq
117 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
118 ; AVX2-FAST: # %bb.0:
119 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
120 ; AVX2-FAST-NEXT: retq
122 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
123 ; AVX512VL-SLOW: # %bb.0:
124 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
125 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
126 ; AVX512VL-SLOW-NEXT: retq
128 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
129 ; AVX512VL-FAST: # %bb.0:
130 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
131 ; AVX512VL-FAST-NEXT: retq
133 ; XOP-LABEL: shuffle_v8i16_00004444:
135 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
136 ; XOP-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
138 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
139 ret <8 x i16> %shuffle
141 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
142 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
144 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
147 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
149 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
151 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
152 ret <8 x i16> %shuffle
154 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
155 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
157 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
160 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
162 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
164 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
165 ret <8 x i16> %shuffle
167 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
168 ; SSE-LABEL: shuffle_v8i16_31206745:
170 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
171 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
174 ; AVX1-LABEL: shuffle_v8i16_31206745:
176 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
177 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
180 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
181 ; AVX2-SLOW: # %bb.0:
182 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
183 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
184 ; AVX2-SLOW-NEXT: retq
186 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
187 ; AVX2-FAST: # %bb.0:
188 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
189 ; AVX2-FAST-NEXT: retq
191 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
192 ; AVX512VL-SLOW: # %bb.0:
193 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
194 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
195 ; AVX512VL-SLOW-NEXT: retq
197 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
198 ; AVX512VL-FAST: # %bb.0:
199 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
200 ; AVX512VL-FAST-NEXT: retq
202 ; XOP-LABEL: shuffle_v8i16_31206745:
204 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
205 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
207 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
208 ret <8 x i16> %shuffle
210 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
211 ; SSE2-LABEL: shuffle_v8i16_44440000:
213 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
214 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
215 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
218 ; SSSE3-LABEL: shuffle_v8i16_44440000:
220 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
223 ; SSE41-LABEL: shuffle_v8i16_44440000:
225 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
228 ; AVX-LABEL: shuffle_v8i16_44440000:
230 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
232 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
233 ret <8 x i16> %shuffle
235 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
236 ; SSE-LABEL: shuffle_v8i16_23016745:
238 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
241 ; AVX-LABEL: shuffle_v8i16_23016745:
243 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
245 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
246 ret <8 x i16> %shuffle
248 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
249 ; SSE-LABEL: shuffle_v8i16_23026745:
251 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
252 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
255 ; AVX1-LABEL: shuffle_v8i16_23026745:
257 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
258 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
261 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
262 ; AVX2-SLOW: # %bb.0:
263 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
264 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
265 ; AVX2-SLOW-NEXT: retq
267 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
268 ; AVX2-FAST: # %bb.0:
269 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
270 ; AVX2-FAST-NEXT: retq
272 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
273 ; AVX512VL-SLOW: # %bb.0:
274 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
275 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
276 ; AVX512VL-SLOW-NEXT: retq
278 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
279 ; AVX512VL-FAST: # %bb.0:
280 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
281 ; AVX512VL-FAST-NEXT: retq
283 ; XOP-LABEL: shuffle_v8i16_23026745:
285 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
286 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
288 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
289 ret <8 x i16> %shuffle
291 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
292 ; SSE-LABEL: shuffle_v8i16_23016747:
294 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
295 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
298 ; AVX1-LABEL: shuffle_v8i16_23016747:
300 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
301 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
304 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
305 ; AVX2-SLOW: # %bb.0:
306 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
307 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
308 ; AVX2-SLOW-NEXT: retq
310 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
311 ; AVX2-FAST: # %bb.0:
312 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
313 ; AVX2-FAST-NEXT: retq
315 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
316 ; AVX512VL-SLOW: # %bb.0:
317 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
318 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
319 ; AVX512VL-SLOW-NEXT: retq
321 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
322 ; AVX512VL-FAST: # %bb.0:
323 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
324 ; AVX512VL-FAST-NEXT: retq
326 ; XOP-LABEL: shuffle_v8i16_23016747:
328 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
329 ; XOP-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
331 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
332 ret <8 x i16> %shuffle
334 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
335 ; SSE2-LABEL: shuffle_v8i16_75643120:
337 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
338 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
339 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
342 ; SSSE3-LABEL: shuffle_v8i16_75643120:
344 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
347 ; SSE41-LABEL: shuffle_v8i16_75643120:
349 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
352 ; AVX-LABEL: shuffle_v8i16_75643120:
354 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
356 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
357 ret <8 x i16> %shuffle
360 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
361 ; SSE2-LABEL: shuffle_v8i16_10545410:
363 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
364 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
365 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
368 ; SSSE3-LABEL: shuffle_v8i16_10545410:
370 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
373 ; SSE41-LABEL: shuffle_v8i16_10545410:
375 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
378 ; AVX-LABEL: shuffle_v8i16_10545410:
380 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
382 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
383 ret <8 x i16> %shuffle
385 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
386 ; SSE2-LABEL: shuffle_v8i16_54105410:
388 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
389 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
390 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
393 ; SSSE3-LABEL: shuffle_v8i16_54105410:
395 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
398 ; SSE41-LABEL: shuffle_v8i16_54105410:
400 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
403 ; AVX-LABEL: shuffle_v8i16_54105410:
405 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
407 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
408 ret <8 x i16> %shuffle
410 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
411 ; SSE2-LABEL: shuffle_v8i16_54101054:
413 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
414 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
415 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
418 ; SSSE3-LABEL: shuffle_v8i16_54101054:
420 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
423 ; SSE41-LABEL: shuffle_v8i16_54101054:
425 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
428 ; AVX-LABEL: shuffle_v8i16_54101054:
430 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
432 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
433 ret <8 x i16> %shuffle
435 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
436 ; SSE2-LABEL: shuffle_v8i16_04400440:
438 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
439 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
440 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
443 ; SSSE3-LABEL: shuffle_v8i16_04400440:
445 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
448 ; SSE41-LABEL: shuffle_v8i16_04400440:
450 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
453 ; AVX-LABEL: shuffle_v8i16_04400440:
455 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
457 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
458 ret <8 x i16> %shuffle
460 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
461 ; SSE2-LABEL: shuffle_v8i16_40044004:
463 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
464 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
465 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
468 ; SSSE3-LABEL: shuffle_v8i16_40044004:
470 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
473 ; SSE41-LABEL: shuffle_v8i16_40044004:
475 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
478 ; AVX-LABEL: shuffle_v8i16_40044004:
480 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
482 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
483 ret <8 x i16> %shuffle
486 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
487 ; SSE2-LABEL: shuffle_v8i16_26405173:
489 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
490 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
491 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
492 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
493 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
496 ; SSSE3-LABEL: shuffle_v8i16_26405173:
498 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
501 ; SSE41-LABEL: shuffle_v8i16_26405173:
503 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
506 ; AVX-LABEL: shuffle_v8i16_26405173:
508 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
510 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
511 ret <8 x i16> %shuffle
513 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
514 ; SSE2-LABEL: shuffle_v8i16_20645173:
516 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
517 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
518 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
519 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
520 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
523 ; SSSE3-LABEL: shuffle_v8i16_20645173:
525 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
528 ; SSE41-LABEL: shuffle_v8i16_20645173:
530 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
533 ; AVX-LABEL: shuffle_v8i16_20645173:
535 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
537 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
538 ret <8 x i16> %shuffle
540 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
541 ; SSE2-LABEL: shuffle_v8i16_26401375:
543 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
544 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
545 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
546 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
549 ; SSSE3-LABEL: shuffle_v8i16_26401375:
551 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
554 ; SSE41-LABEL: shuffle_v8i16_26401375:
556 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
559 ; AVX-LABEL: shuffle_v8i16_26401375:
561 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
563 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
564 ret <8 x i16> %shuffle
567 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
568 ; SSE2-LABEL: shuffle_v8i16_66751643:
570 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
571 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
572 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
573 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
574 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
577 ; SSSE3-LABEL: shuffle_v8i16_66751643:
579 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
582 ; SSE41-LABEL: shuffle_v8i16_66751643:
584 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
587 ; AVX-LABEL: shuffle_v8i16_66751643:
589 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
591 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
592 ret <8 x i16> %shuffle
595 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
596 ; SSE2-LABEL: shuffle_v8i16_60514754:
598 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
599 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
600 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
601 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
604 ; SSSE3-LABEL: shuffle_v8i16_60514754:
606 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
609 ; SSE41-LABEL: shuffle_v8i16_60514754:
611 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
614 ; AVX-LABEL: shuffle_v8i16_60514754:
616 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
618 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
619 ret <8 x i16> %shuffle
622 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
623 ; SSE2-LABEL: shuffle_v8i16_00444444:
625 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
626 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
627 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
630 ; SSSE3-LABEL: shuffle_v8i16_00444444:
632 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
635 ; SSE41-LABEL: shuffle_v8i16_00444444:
637 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
640 ; AVX-LABEL: shuffle_v8i16_00444444:
642 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
644 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
645 ret <8 x i16> %shuffle
647 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
648 ; SSE2-LABEL: shuffle_v8i16_44004444:
650 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
651 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
652 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
655 ; SSSE3-LABEL: shuffle_v8i16_44004444:
657 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
660 ; SSE41-LABEL: shuffle_v8i16_44004444:
662 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
665 ; AVX-LABEL: shuffle_v8i16_44004444:
667 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
669 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
670 ret <8 x i16> %shuffle
672 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
673 ; SSE2-LABEL: shuffle_v8i16_04404444:
675 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
676 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
677 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
680 ; SSSE3-LABEL: shuffle_v8i16_04404444:
682 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
685 ; SSE41-LABEL: shuffle_v8i16_04404444:
687 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
690 ; AVX-LABEL: shuffle_v8i16_04404444:
692 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
694 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
695 ret <8 x i16> %shuffle
697 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
698 ; SSE2-LABEL: shuffle_v8i16_04400000:
700 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
701 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
702 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
705 ; SSSE3-LABEL: shuffle_v8i16_04400000:
707 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
710 ; SSE41-LABEL: shuffle_v8i16_04400000:
712 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
715 ; AVX-LABEL: shuffle_v8i16_04400000:
717 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
719 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
720 ret <8 x i16> %shuffle
722 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
723 ; SSE-LABEL: shuffle_v8i16_04404567:
725 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
726 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
729 ; AVX1-LABEL: shuffle_v8i16_04404567:
731 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
732 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
735 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
736 ; AVX2-SLOW: # %bb.0:
737 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
738 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
739 ; AVX2-SLOW-NEXT: retq
741 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
742 ; AVX2-FAST: # %bb.0:
743 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
744 ; AVX2-FAST-NEXT: retq
746 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
747 ; AVX512VL-SLOW: # %bb.0:
748 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
749 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
750 ; AVX512VL-SLOW-NEXT: retq
752 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
753 ; AVX512VL-FAST: # %bb.0:
754 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
755 ; AVX512VL-FAST-NEXT: retq
757 ; XOP-LABEL: shuffle_v8i16_04404567:
759 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
760 ; XOP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
762 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
763 ret <8 x i16> %shuffle
766 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
767 ; SSE2-LABEL: shuffle_v8i16_0X444444:
769 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
770 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
771 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
774 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
776 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
779 ; SSE41-LABEL: shuffle_v8i16_0X444444:
781 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
784 ; AVX-LABEL: shuffle_v8i16_0X444444:
786 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
788 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
789 ret <8 x i16> %shuffle
791 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
792 ; SSE2-LABEL: shuffle_v8i16_44X04444:
794 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
795 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
796 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
799 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
801 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
804 ; SSE41-LABEL: shuffle_v8i16_44X04444:
806 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
809 ; AVX-LABEL: shuffle_v8i16_44X04444:
811 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
813 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
814 ret <8 x i16> %shuffle
816 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
817 ; SSE2-LABEL: shuffle_v8i16_X4404444:
819 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
820 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
821 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
824 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
826 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
829 ; SSE41-LABEL: shuffle_v8i16_X4404444:
831 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
834 ; AVX-LABEL: shuffle_v8i16_X4404444:
836 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
838 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
839 ret <8 x i16> %shuffle
842 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
843 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
845 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
846 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
847 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
850 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
852 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
855 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
857 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
860 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
862 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
864 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
865 ret <8 x i16> %shuffle
868 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
869 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
871 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
872 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
873 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
876 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
878 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
881 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
883 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
886 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
888 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
890 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
891 ret <8 x i16> %shuffle
894 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
895 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
897 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
898 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
899 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
902 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
904 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
907 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
909 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
912 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
914 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
916 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
917 ret <8 x i16> %shuffle
920 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
921 ; SSE2-LABEL: shuffle_v8i16_01274563:
923 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
924 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
925 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
928 ; SSSE3-LABEL: shuffle_v8i16_01274563:
930 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
933 ; SSE41-LABEL: shuffle_v8i16_01274563:
935 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
938 ; AVX-LABEL: shuffle_v8i16_01274563:
940 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
942 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
943 ret <8 x i16> %shuffle
946 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
947 ; SSE2-LABEL: shuffle_v8i16_45630127:
949 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
950 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
951 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
954 ; SSSE3-LABEL: shuffle_v8i16_45630127:
956 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
959 ; SSE41-LABEL: shuffle_v8i16_45630127:
961 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
964 ; AVX-LABEL: shuffle_v8i16_45630127:
966 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
968 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
969 ret <8 x i16> %shuffle
972 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
973 ; SSE2-LABEL: shuffle_v8i16_37102735:
975 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
976 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
977 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
978 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
979 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
980 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
983 ; SSSE3-LABEL: shuffle_v8i16_37102735:
985 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
988 ; SSE41-LABEL: shuffle_v8i16_37102735:
990 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
993 ; AVX-LABEL: shuffle_v8i16_37102735:
995 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
997 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
998 ret <8 x i16> %shuffle
1001 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
1002 ; SSE-LABEL: shuffle_v8i16_08192a3b:
1004 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1007 ; AVX-LABEL: shuffle_v8i16_08192a3b:
1009 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1011 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1012 ret <8 x i16> %shuffle
1015 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
1016 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
1018 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1019 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1022 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
1024 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1025 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1027 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
1028 ret <8 x i16> %shuffle
1031 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
1032 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
1034 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1037 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
1039 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1041 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1042 ret <8 x i16> %shuffle
1045 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
1046 ; SSE-LABEL: shuffle_v8i16_48596a7b:
1048 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1049 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1052 ; AVX-LABEL: shuffle_v8i16_48596a7b:
1054 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1055 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1057 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
1058 ret <8 x i16> %shuffle
1061 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
1062 ; SSE-LABEL: shuffle_v8i16_08196e7f:
1064 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1065 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1066 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1069 ; AVX1-LABEL: shuffle_v8i16_08196e7f:
1071 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1072 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1073 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1076 ; AVX2OR512VL-LABEL: shuffle_v8i16_08196e7f:
1077 ; AVX2OR512VL: # %bb.0:
1078 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1079 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1080 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1081 ; AVX2OR512VL-NEXT: retq
1083 ; XOP-LABEL: shuffle_v8i16_08196e7f:
1085 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[2,3],xmm1[2,3],xmm0[12,13],xmm1[12,13],xmm0[14,15],xmm1[14,15]
1087 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
1088 ret <8 x i16> %shuffle
1091 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
1092 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
1094 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1095 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1096 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1099 ; AVX1-LABEL: shuffle_v8i16_0c1d6879:
1101 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1102 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1103 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1106 ; AVX2OR512VL-LABEL: shuffle_v8i16_0c1d6879:
1107 ; AVX2OR512VL: # %bb.0:
1108 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1109 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1110 ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1111 ; AVX2OR512VL-NEXT: retq
1113 ; XOP-LABEL: shuffle_v8i16_0c1d6879:
1115 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],xmm1[8,9],xmm0[2,3],xmm1[10,11],xmm0[12,13],xmm1[0,1],xmm0[14,15],xmm1[2,3]
1117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
1118 ret <8 x i16> %shuffle
1121 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
1122 ; SSE-LABEL: shuffle_v8i16_109832ba:
1124 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1125 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1126 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1129 ; AVX1-LABEL: shuffle_v8i16_109832ba:
1131 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1132 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1133 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1136 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
1137 ; AVX2-SLOW: # %bb.0:
1138 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1139 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1140 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1141 ; AVX2-SLOW-NEXT: retq
1143 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
1144 ; AVX2-FAST: # %bb.0:
1145 ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1146 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1147 ; AVX2-FAST-NEXT: retq
1149 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
1150 ; AVX512VL-SLOW: # %bb.0:
1151 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1152 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1153 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1154 ; AVX512VL-SLOW-NEXT: retq
1156 ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
1157 ; AVX512VL-FAST: # %bb.0:
1158 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,9,8,3,2,11,10]
1159 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1160 ; AVX512VL-FAST-NEXT: retq
1162 ; XOP-LABEL: shuffle_v8i16_109832ba:
1164 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],xmm1[2,3,0,1],xmm0[6,7,4,5],xmm1[6,7,4,5]
1166 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
1167 ret <8 x i16> %shuffle
1170 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
1171 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
1173 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1174 ; SSE-NEXT: movdqa %xmm1, %xmm0
1177 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
1179 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1181 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
1182 ret <8 x i16> %shuffle
1184 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
1185 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
1187 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1188 ; SSE-NEXT: movdqa %xmm1, %xmm0
1191 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
1193 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1195 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
1196 ret <8 x i16> %shuffle
1199 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1200 ; SSE2-LABEL: shuffle_v8i16_0213cedf:
1202 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1203 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1204 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1207 ; SSSE3-LABEL: shuffle_v8i16_0213cedf:
1209 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1210 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1211 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1214 ; SSE41-LABEL: shuffle_v8i16_0213cedf:
1216 ; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1217 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1218 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1221 ; AVX1-LABEL: shuffle_v8i16_0213cedf:
1223 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1224 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1225 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1228 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
1229 ; AVX2-SLOW: # %bb.0:
1230 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1231 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1232 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1233 ; AVX2-SLOW-NEXT: retq
1235 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
1236 ; AVX2-FAST: # %bb.0:
1237 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,u,u,u,u,u,u,u,u]
1238 ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1239 ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1240 ; AVX2-FAST-NEXT: retq
1242 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
1243 ; AVX512VL-SLOW: # %bb.0:
1244 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1245 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1246 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1247 ; AVX512VL-SLOW-NEXT: retq
1249 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
1250 ; AVX512VL-FAST: # %bb.0:
1251 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15]
1252 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1253 ; AVX512VL-FAST-NEXT: retq
1255 ; XOP-LABEL: shuffle_v8i16_0213cedf:
1257 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,4,5,2,3,6,7],xmm1[8,9,12,13,10,11,14,15]
1259 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1260 ret <8 x i16> %shuffle
1263 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1264 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1266 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1267 ; SSE2-NEXT: pand %xmm2, %xmm0
1268 ; SSE2-NEXT: pandn %xmm1, %xmm2
1269 ; SSE2-NEXT: por %xmm0, %xmm2
1270 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1271 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1274 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1276 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1277 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1278 ; SSSE3-NEXT: por %xmm1, %xmm0
1281 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1283 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1284 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1285 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1288 ; AVX1-LABEL: shuffle_v8i16_443aXXXX:
1290 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1292 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1295 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1296 ; AVX2-SLOW: # %bb.0:
1297 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1298 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1299 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1300 ; AVX2-SLOW-NEXT: retq
1302 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
1303 ; AVX2-FAST: # %bb.0:
1304 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1305 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1306 ; AVX2-FAST-NEXT: retq
1308 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1309 ; AVX512VL-SLOW: # %bb.0:
1310 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1311 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1312 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1313 ; AVX512VL-SLOW-NEXT: retq
1315 ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
1316 ; AVX512VL-FAST: # %bb.0:
1317 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,4,3,10,4,5,6,7]
1318 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1319 ; AVX512VL-FAST-NEXT: retq
1321 ; XOP-LABEL: shuffle_v8i16_443aXXXX:
1323 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],xmm1[4,5],xmm0[8,9,10,11,12,13,14,15]
1325 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1326 ret <8 x i16> %shuffle
1329 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1330 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1332 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1333 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
1334 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1335 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1336 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1339 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1341 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1342 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1343 ; SSSE3-NEXT: por %xmm1, %xmm0
1346 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1348 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1349 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1352 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1354 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1355 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1358 ; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1360 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1361 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1364 ; AVX512VL-LABEL: shuffle_v8i16_032dXXXX:
1365 ; AVX512VL: # %bb.0:
1366 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,3,2,13,0,13,0,1]
1367 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1368 ; AVX512VL-NEXT: retq
1370 ; XOP-LABEL: shuffle_v8i16_032dXXXX:
1372 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],xmm1[10,11],xmm0[0,1],xmm1[10,11],xmm0[0,1,2,3]
1374 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1375 ret <8 x i16> %shuffle
1377 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1378 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1380 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1383 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1385 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
1387 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1388 ret <8 x i16> %shuffle
1391 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1392 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1394 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1395 ; SSE2-NEXT: pand %xmm2, %xmm0
1396 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1397 ; SSE2-NEXT: pandn %xmm1, %xmm2
1398 ; SSE2-NEXT: por %xmm2, %xmm0
1401 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1403 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1404 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1405 ; SSSE3-NEXT: por %xmm1, %xmm0
1408 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1410 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1411 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1414 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
1416 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1417 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1419 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1420 ret <8 x i16> %shuffle
1423 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1424 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1426 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1427 ; SSE2-NEXT: pand %xmm2, %xmm1
1428 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1429 ; SSE2-NEXT: pandn %xmm0, %xmm2
1430 ; SSE2-NEXT: por %xmm1, %xmm2
1431 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1434 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1436 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1437 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1438 ; SSSE3-NEXT: por %xmm1, %xmm0
1441 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1443 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1444 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1447 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1449 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1450 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1453 ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
1454 ; AVX2OR512VL: # %bb.0:
1455 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0
1456 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1457 ; AVX2OR512VL-NEXT: retq
1459 ; XOPAVX1-LABEL: shuffle_v8i16_XXXXcde3:
1461 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1462 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1463 ; XOPAVX1-NEXT: retq
1465 ; XOPAVX2-LABEL: shuffle_v8i16_XXXXcde3:
1467 ; XOPAVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1468 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1469 ; XOPAVX2-NEXT: retq
1470 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1471 ret <8 x i16> %shuffle
1474 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1475 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1477 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1478 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1479 ; SSE2-NEXT: pand %xmm2, %xmm1
1480 ; SSE2-NEXT: pandn %xmm0, %xmm2
1481 ; SSE2-NEXT: por %xmm1, %xmm2
1482 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1485 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1487 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1488 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1489 ; SSSE3-NEXT: por %xmm1, %xmm0
1492 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1494 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1495 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1498 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1500 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1501 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1503 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1504 ret <8 x i16> %shuffle
1507 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1508 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
1510 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1511 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
1512 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1513 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1514 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1515 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1516 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1517 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1520 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1522 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1523 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1524 ; SSSE3-NEXT: por %xmm1, %xmm0
1527 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
1529 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1530 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1533 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
1535 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1536 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1539 ; AVX2-LABEL: shuffle_v8i16_012dcde3:
1541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1542 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1545 ; AVX512VL-LABEL: shuffle_v8i16_012dcde3:
1546 ; AVX512VL: # %bb.0:
1547 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,12,13,14,3]
1548 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1549 ; AVX512VL-NEXT: retq
1551 ; XOP-LABEL: shuffle_v8i16_012dcde3:
1553 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[10,11,8,9,10,11,12,13],xmm0[6,7]
1555 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1556 ret <8 x i16> %shuffle
1559 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1560 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
1562 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1563 ; SSE2-NEXT: andps %xmm2, %xmm0
1564 ; SSE2-NEXT: andnps %xmm1, %xmm2
1565 ; SSE2-NEXT: orps %xmm2, %xmm0
1568 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1570 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1571 ; SSSE3-NEXT: andps %xmm2, %xmm0
1572 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1573 ; SSSE3-NEXT: orps %xmm2, %xmm0
1576 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
1578 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1581 ; AVX-LABEL: shuffle_v8i16_0923cde7:
1583 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1585 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1586 ret <8 x i16> %shuffle
1589 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1590 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1592 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0]
1593 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1594 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1595 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1596 ; SSE2-NEXT: pand %xmm1, %xmm0
1597 ; SSE2-NEXT: pandn %xmm2, %xmm1
1598 ; SSE2-NEXT: por %xmm0, %xmm1
1599 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1602 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1604 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1605 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1606 ; SSSE3-NEXT: por %xmm1, %xmm0
1609 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1611 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1612 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1613 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1614 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1617 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1619 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1620 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1621 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1622 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1625 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1626 ; AVX2-SLOW: # %bb.0:
1627 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1628 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1629 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1630 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1631 ; AVX2-SLOW-NEXT: retq
1633 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
1634 ; AVX2-FAST: # %bb.0:
1635 ; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1636 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u]
1637 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1638 ; AVX2-FAST-NEXT: retq
1640 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1641 ; AVX512VL-SLOW: # %bb.0:
1642 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1643 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1644 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1645 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1646 ; AVX512VL-SLOW-NEXT: retq
1648 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
1649 ; AVX512VL-FAST: # %bb.0:
1650 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,4,5,7,9]
1651 ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1652 ; AVX512VL-FAST-NEXT: retq
1654 ; XOP-LABEL: shuffle_v8i16_XXX1X579:
1656 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15],xmm1[2,3]
1658 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1659 ret <8 x i16> %shuffle
1662 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1663 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1665 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1666 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1667 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1668 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3]
1671 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1673 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1674 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1675 ; SSSE3-NEXT: por %xmm1, %xmm0
1678 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1680 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1681 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1682 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1685 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1687 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1688 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1689 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1692 ; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1694 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1695 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1696 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1699 ; AVX512VL-LABEL: shuffle_v8i16_XX4X8acX:
1700 ; AVX512VL: # %bb.0:
1701 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,4,5,8,10,12,10]
1702 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1703 ; AVX512VL-NEXT: retq
1705 ; XOP-LABEL: shuffle_v8i16_XX4X8acX:
1707 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,9,10,11,8,9,10,11],xmm1[0,1,4,5,8,9,4,5]
1709 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1710 ret <8 x i16> %shuffle
1713 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1714 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1716 ; SSE-NEXT: movzwl %di, %eax
1717 ; SSE-NEXT: movd %eax, %xmm0
1720 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1722 ; AVX-NEXT: movzwl %di, %eax
1723 ; AVX-NEXT: vmovd %eax, %xmm0
1725 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1726 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1727 ret <8 x i16> %shuffle
1730 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1731 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1733 ; SSE-NEXT: pxor %xmm0, %xmm0
1734 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1737 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1739 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1740 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1742 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1743 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1744 ret <8 x i16> %shuffle
1747 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1748 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1750 ; SSE-NEXT: pxor %xmm0, %xmm0
1751 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1754 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1756 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1757 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1759 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1760 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1761 ret <8 x i16> %shuffle
1764 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1765 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1767 ; SSE-NEXT: pxor %xmm0, %xmm0
1768 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1771 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1773 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1774 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1776 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1777 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1778 ret <8 x i16> %shuffle
1781 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1782 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1784 ; SSE-NEXT: pxor %xmm0, %xmm0
1785 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1788 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1790 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1791 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1793 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1794 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1795 ret <8 x i16> %shuffle
1798 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1799 ; SSE2-LABEL: shuffle_v8i16_def01234:
1801 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1802 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1803 ; SSE2-NEXT: por %xmm1, %xmm0
1806 ; SSSE3-LABEL: shuffle_v8i16_def01234:
1808 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1811 ; SSE41-LABEL: shuffle_v8i16_def01234:
1813 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1816 ; AVX-LABEL: shuffle_v8i16_def01234:
1818 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1820 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1821 ret <8 x i16> %shuffle
1824 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1825 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1827 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1828 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1829 ; SSE2-NEXT: por %xmm1, %xmm0
1832 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1834 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1837 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1839 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1842 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
1844 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1846 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1847 ret <8 x i16> %shuffle
1850 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1851 ; SSE2-LABEL: shuffle_v8i16_56701234:
1853 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1854 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1855 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1856 ; SSE2-NEXT: por %xmm1, %xmm0
1859 ; SSSE3-LABEL: shuffle_v8i16_56701234:
1861 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1864 ; SSE41-LABEL: shuffle_v8i16_56701234:
1866 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1869 ; AVX-LABEL: shuffle_v8i16_56701234:
1871 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1873 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1874 ret <8 x i16> %shuffle
1877 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1878 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1880 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1881 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1882 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1883 ; SSE2-NEXT: por %xmm1, %xmm0
1886 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1888 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1891 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1893 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1896 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
1898 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1900 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1901 ret <8 x i16> %shuffle
1904 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1905 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
1907 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1910 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
1912 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1914 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1915 ret <8 x i16> %shuffle
1918 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1919 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
1921 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1922 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1923 ; SSE2-NEXT: por %xmm1, %xmm0
1926 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1928 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1931 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
1933 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1936 ; AVX-LABEL: shuffle_v8i16_bcdef012:
1938 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1940 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1941 ret <8 x i16> %shuffle
1944 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1945 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1947 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1948 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1949 ; SSE2-NEXT: por %xmm1, %xmm0
1952 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1954 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1957 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1959 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1962 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1964 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1967 ret <8 x i16> %shuffle
1970 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1971 ; SSE2-LABEL: shuffle_v8i16_34567012:
1973 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1974 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1975 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1976 ; SSE2-NEXT: por %xmm1, %xmm0
1979 ; SSSE3-LABEL: shuffle_v8i16_34567012:
1981 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1984 ; SSE41-LABEL: shuffle_v8i16_34567012:
1986 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1989 ; AVX-LABEL: shuffle_v8i16_34567012:
1991 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1993 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1994 ret <8 x i16> %shuffle
1997 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1998 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
2000 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2001 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2002 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2003 ; SSE2-NEXT: por %xmm1, %xmm0
2006 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
2008 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
2011 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
2013 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
2016 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
2018 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
2020 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
2021 ret <8 x i16> %shuffle
2024 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
2025 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
2027 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2030 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
2032 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2034 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
2035 ret <8 x i16> %shuffle
2038 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
2039 ; SSE2-LABEL: shuffle_v8i16_3456789a:
2041 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2042 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
2043 ; SSE2-NEXT: por %xmm1, %xmm0
2046 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
2048 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2049 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2052 ; SSE41-LABEL: shuffle_v8i16_3456789a:
2054 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2055 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2058 ; AVX-LABEL: shuffle_v8i16_3456789a:
2060 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2062 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
2063 ret <8 x i16> %shuffle
2066 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
2067 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
2069 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2070 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
2071 ; SSE2-NEXT: por %xmm1, %xmm0
2074 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
2076 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2077 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2080 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
2082 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2083 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2086 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
2088 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
2090 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
2091 ret <8 x i16> %shuffle
2094 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
2095 ; SSE2-LABEL: shuffle_v8i16_56789abc:
2097 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2098 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
2099 ; SSE2-NEXT: por %xmm1, %xmm0
2102 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
2104 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2105 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2108 ; SSE41-LABEL: shuffle_v8i16_56789abc:
2110 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2111 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2114 ; AVX-LABEL: shuffle_v8i16_56789abc:
2116 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2118 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
2119 ret <8 x i16> %shuffle
2122 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
2123 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
2125 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2126 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
2127 ; SSE2-NEXT: por %xmm1, %xmm0
2130 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
2132 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2133 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2136 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
2138 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2139 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2142 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
2144 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2146 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2147 ret <8 x i16> %shuffle
2150 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
2151 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
2153 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2154 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2157 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
2159 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2160 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2163 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
2165 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2168 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
2170 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2172 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
2173 ret <8 x i16> %shuffle
2176 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
2177 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
2179 ; SSE2-NEXT: pxor %xmm1, %xmm1
2180 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2181 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2184 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
2186 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2187 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2188 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2191 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
2193 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2196 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
2198 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2200 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2201 ret <8 x i16> %shuffle
2204 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
2205 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
2207 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2210 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
2212 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2215 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
2217 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2220 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
2222 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2224 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
2225 ret <8 x i16> %shuffle
2228 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
2229 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
2231 ; SSE2-NEXT: pxor %xmm1, %xmm1
2232 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2235 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
2237 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2238 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2241 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
2243 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2246 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
2248 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2250 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2251 ret <8 x i16> %shuffle
2254 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
2255 ; SSE-LABEL: shuffle_v8i16_01100110:
2257 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2258 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2261 ; AVX1-LABEL: shuffle_v8i16_01100110:
2263 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2264 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2267 ; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
2268 ; AVX2-SLOW: # %bb.0:
2269 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2270 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2271 ; AVX2-SLOW-NEXT: retq
2273 ; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
2274 ; AVX2-FAST: # %bb.0:
2275 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2276 ; AVX2-FAST-NEXT: retq
2278 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
2279 ; AVX512VL-SLOW: # %bb.0:
2280 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2281 ; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2282 ; AVX512VL-SLOW-NEXT: retq
2284 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
2285 ; AVX512VL-FAST: # %bb.0:
2286 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2287 ; AVX512VL-FAST-NEXT: retq
2289 ; XOPAVX1-LABEL: shuffle_v8i16_01100110:
2291 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2292 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2293 ; XOPAVX1-NEXT: retq
2295 ; XOPAVX2-LABEL: shuffle_v8i16_01100110:
2297 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2298 ; XOPAVX2-NEXT: vpbroadcastq %xmm0, %xmm0
2299 ; XOPAVX2-NEXT: retq
2300 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
2301 ret <8 x i16> %shuffle
2304 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
2305 ; SSE-LABEL: shuffle_v8i16_01u0u110:
2307 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2308 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2311 ; AVX1-LABEL: shuffle_v8i16_01u0u110:
2313 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2314 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2317 ; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
2318 ; AVX2-SLOW: # %bb.0:
2319 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2320 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2321 ; AVX2-SLOW-NEXT: retq
2323 ; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
2324 ; AVX2-FAST: # %bb.0:
2325 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2326 ; AVX2-FAST-NEXT: retq
2328 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
2329 ; AVX512VL-SLOW: # %bb.0:
2330 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2331 ; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2332 ; AVX512VL-SLOW-NEXT: retq
2334 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
2335 ; AVX512VL-FAST: # %bb.0:
2336 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2337 ; AVX512VL-FAST-NEXT: retq
2339 ; XOPAVX1-LABEL: shuffle_v8i16_01u0u110:
2341 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2342 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2343 ; XOPAVX1-NEXT: retq
2345 ; XOPAVX2-LABEL: shuffle_v8i16_01u0u110:
2347 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2348 ; XOPAVX2-NEXT: vpbroadcastq %xmm0, %xmm0
2349 ; XOPAVX2-NEXT: retq
2350 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
2351 ret <8 x i16> %shuffle
2354 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
2355 ; SSE-LABEL: shuffle_v8i16_467uu675:
2357 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2358 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2361 ; AVX1-LABEL: shuffle_v8i16_467uu675:
2363 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2364 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2367 ; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
2368 ; AVX2-SLOW: # %bb.0:
2369 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2370 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2371 ; AVX2-SLOW-NEXT: retq
2373 ; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
2374 ; AVX2-FAST: # %bb.0:
2375 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2376 ; AVX2-FAST-NEXT: retq
2378 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
2379 ; AVX512VL-SLOW: # %bb.0:
2380 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2381 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2382 ; AVX512VL-SLOW-NEXT: retq
2384 ; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
2385 ; AVX512VL-FAST: # %bb.0:
2386 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2387 ; AVX512VL-FAST-NEXT: retq
2389 ; XOP-LABEL: shuffle_v8i16_467uu675:
2391 ; XOP-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2392 ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2394 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
2395 ret <8 x i16> %shuffle
2398 define <8 x i16> @shuffle_v8i16_10325476(<8 x i16> %a) {
2399 ; SSE-LABEL: shuffle_v8i16_10325476:
2401 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
2402 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
2405 ; AVX1-LABEL: shuffle_v8i16_10325476:
2407 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
2408 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
2411 ; AVX2-SLOW-LABEL: shuffle_v8i16_10325476:
2412 ; AVX2-SLOW: # %bb.0:
2413 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
2414 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
2415 ; AVX2-SLOW-NEXT: retq
2417 ; AVX2-FAST-LABEL: shuffle_v8i16_10325476:
2418 ; AVX2-FAST: # %bb.0:
2419 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
2420 ; AVX2-FAST-NEXT: retq
2422 ; AVX512VL-LABEL: shuffle_v8i16_10325476:
2423 ; AVX512VL: # %bb.0:
2424 ; AVX512VL-NEXT: vprold $16, %xmm0, %xmm0
2425 ; AVX512VL-NEXT: retq
2427 ; XOP-LABEL: shuffle_v8i16_10325476:
2429 ; XOP-NEXT: vprotd $16, %xmm0, %xmm0
2431 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
2432 ret <8 x i16> %shuffle
2435 define <8 x i16> @shuffle_v8i16_12305674(<8 x i16> %a) {
2436 ; SSE-LABEL: shuffle_v8i16_12305674:
2438 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2439 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2442 ; AVX1-LABEL: shuffle_v8i16_12305674:
2444 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2445 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2448 ; AVX2-SLOW-LABEL: shuffle_v8i16_12305674:
2449 ; AVX2-SLOW: # %bb.0:
2450 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2451 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2452 ; AVX2-SLOW-NEXT: retq
2454 ; AVX2-FAST-LABEL: shuffle_v8i16_12305674:
2455 ; AVX2-FAST: # %bb.0:
2456 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9]
2457 ; AVX2-FAST-NEXT: retq
2459 ; AVX512VL-LABEL: shuffle_v8i16_12305674:
2460 ; AVX512VL: # %bb.0:
2461 ; AVX512VL-NEXT: vprolq $48, %xmm0, %xmm0
2462 ; AVX512VL-NEXT: retq
2464 ; XOP-LABEL: shuffle_v8i16_12305674:
2466 ; XOP-NEXT: vprotq $48, %xmm0, %xmm0
2468 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2469 ret <8 x i16> %shuffle
2472 define <8 x i16> @shuffle_v8i16_02460246(<8 x i16> %a) {
2473 ; SSE2-LABEL: shuffle_v8i16_02460246:
2475 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
2476 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
2477 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
2478 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2479 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,5]
2482 ; SSSE3-LABEL: shuffle_v8i16_02460246:
2484 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
2487 ; SSE41-LABEL: shuffle_v8i16_02460246:
2489 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
2492 ; AVX-LABEL: shuffle_v8i16_02460246:
2494 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
2496 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
2497 ret <8 x i16> %shuffle
2500 define <8 x i16> @shuffle_v8i16_04040404(<8 x i16> %a) {
2501 ; SSE2-LABEL: shuffle_v8i16_04040404:
2503 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
2504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
2505 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,6,4]
2508 ; SSSE3-LABEL: shuffle_v8i16_04040404:
2510 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,0,1,8,9,0,1,8,9,0,1,8,9]
2513 ; SSE41-LABEL: shuffle_v8i16_04040404:
2515 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,0,1,8,9,0,1,8,9,0,1,8,9]
2518 ; AVX-LABEL: shuffle_v8i16_04040404:
2520 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,0,1,8,9,0,1,8,9,0,1,8,9]
2522 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 4, i32 0, i32 4, i32 0, i32 4, i32 0, i32 4>
2523 ret <8 x i16> %shuffle
2526 define <8 x i16> @shuffle_v8i16_02468ACE(<8 x i16> %a, <8 x i16> %b) {
2527 ; SSE2-LABEL: shuffle_v8i16_02468ACE:
2529 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
2530 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2531 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2532 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2533 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2534 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2535 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2538 ; SSSE3-LABEL: shuffle_v8i16_02468ACE:
2540 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2541 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
2542 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
2543 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2546 ; SSE41-LABEL: shuffle_v8i16_02468ACE:
2548 ; SSE41-NEXT: pxor %xmm2, %xmm2
2549 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2550 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2551 ; SSE41-NEXT: packusdw %xmm1, %xmm0
2554 ; AVX1-LABEL: shuffle_v8i16_02468ACE:
2556 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2557 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2558 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2559 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2562 ; AVX2-LABEL: shuffle_v8i16_02468ACE:
2564 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2565 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2566 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2567 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2570 ; AVX512VL-LABEL: shuffle_v8i16_02468ACE:
2571 ; AVX512VL: # %bb.0:
2572 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2573 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2574 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2575 ; AVX512VL-NEXT: vzeroupper
2576 ; AVX512VL-NEXT: retq
2578 ; XOP-LABEL: shuffle_v8i16_02468ACE:
2580 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],xmm1[0,1,4,5,8,9,12,13]
2582 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
2583 ret <8 x i16> %shuffle
2586 define <8 x i16> @shuffle_v8i16_048C048C(<8 x i16> %a, <8 x i16> %b) {
2587 ; SSE2-LABEL: shuffle_v8i16_048C048C:
2589 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2590 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,0,2,4,5,6,7]
2591 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2592 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
2593 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2596 ; SSSE3-LABEL: shuffle_v8i16_048C048C:
2598 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2599 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,0,2,4,5,6,7]
2600 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2601 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
2602 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2605 ; SSE41-LABEL: shuffle_v8i16_048C048C:
2607 ; SSE41-NEXT: pxor %xmm2, %xmm2
2608 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2609 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2610 ; SSE41-NEXT: packusdw %xmm1, %xmm0
2611 ; SSE41-NEXT: packusdw %xmm0, %xmm0
2614 ; AVX1-LABEL: shuffle_v8i16_048C048C:
2616 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2617 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2618 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2619 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2620 ; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2623 ; AVX2-LABEL: shuffle_v8i16_048C048C:
2625 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2626 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2627 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2628 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2629 ; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2632 ; AVX512VL-LABEL: shuffle_v8i16_048C048C:
2633 ; AVX512VL: # %bb.0:
2634 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [3377734080528384,3377734080528384]
2635 ; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
2636 ; AVX512VL-NEXT: retq
2638 ; XOP-LABEL: shuffle_v8i16_048C048C:
2640 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,8,9],xmm1[0,1,8,9],xmm0[0,1,8,9],xmm1[0,1,8,9]
2642 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 0, i32 4, i32 8, i32 12>
2643 ret <8 x i16> %shuffle
2647 ; Shuffle to logical bit shifts
2649 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
2650 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
2652 ; SSE-NEXT: pslld $16, %xmm0
2655 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
2657 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
2659 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
2660 ret <8 x i16> %shuffle
2663 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
2664 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
2666 ; SSE-NEXT: psllq $48, %xmm0
2669 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
2671 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
2673 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
2674 ret <8 x i16> %shuffle
2677 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
2678 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
2680 ; SSE-NEXT: psllq $32, %xmm0
2683 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
2685 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2687 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
2688 ret <8 x i16> %shuffle
2691 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
2692 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
2694 ; SSE-NEXT: psllq $16, %xmm0
2697 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
2699 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
2701 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
2702 ret <8 x i16> %shuffle
2705 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
2706 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
2708 ; SSE-NEXT: psrld $16, %xmm0
2711 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
2713 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
2715 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
2716 ret <8 x i16> %shuffle
2719 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
2720 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
2722 ; SSE-NEXT: psrlq $16, %xmm0
2725 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
2727 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
2729 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2730 ret <8 x i16> %shuffle
2733 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2734 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
2736 ; SSE-NEXT: psrlq $32, %xmm0
2739 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
2741 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2743 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2744 ret <8 x i16> %shuffle
2747 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2748 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2750 ; SSE-NEXT: psrlq $48, %xmm0
2753 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2755 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2757 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2758 ret <8 x i16> %shuffle
2761 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2762 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2764 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2767 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2769 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2771 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2772 ret <8 x i16> %shuffle
2775 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2776 ; SSE2-LABEL: shuffle_v8i16_0z234567:
2778 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2781 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
2783 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2786 ; SSE41-LABEL: shuffle_v8i16_0z234567:
2788 ; SSE41-NEXT: pxor %xmm1, %xmm1
2789 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2792 ; AVX-LABEL: shuffle_v8i16_0z234567:
2794 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2795 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2797 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2798 ret <8 x i16> %shuffle
2801 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2802 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2804 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2807 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2809 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2812 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2814 ; SSE41-NEXT: pxor %xmm1, %xmm1
2815 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2818 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2820 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2821 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2823 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2824 ret <8 x i16> %shuffle
2827 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2828 ; SSE2-LABEL: shuffle_v8i16_0123456z:
2830 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2833 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
2835 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2838 ; SSE41-LABEL: shuffle_v8i16_0123456z:
2840 ; SSE41-NEXT: pxor %xmm1, %xmm1
2841 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2844 ; AVX-LABEL: shuffle_v8i16_0123456z:
2846 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2847 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2849 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2850 ret <8 x i16> %shuffle
2853 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2854 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2856 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2857 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2858 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2859 ; SSE-NEXT: movdqa %xmm1, %xmm0
2862 ; AVX1-LABEL: shuffle_v8i16_fu3ucc5u:
2864 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2865 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2866 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2869 ; AVX2OR512VL-LABEL: shuffle_v8i16_fu3ucc5u:
2870 ; AVX2OR512VL: # %bb.0:
2871 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2872 ; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2873 ; AVX2OR512VL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2874 ; AVX2OR512VL-NEXT: retq
2876 ; XOP-LABEL: shuffle_v8i16_fu3ucc5u:
2878 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[14,15,10,11],xmm0[6,7,8,9],xmm1[8,9,8,9],xmm0[10,11,12,13]
2880 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2881 ret <8 x i16> %shuffle
2884 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2885 ; SSE-LABEL: shuffle_v8i16_8012345u:
2887 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2890 ; AVX-LABEL: shuffle_v8i16_8012345u:
2892 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2894 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2896 ret <8 x i16> %shuffle
2900 define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) {
2901 ; SSE-LABEL: shuffle_v8i16_9zzzuuuu:
2903 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2904 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2907 ; AVX1-LABEL: shuffle_v8i16_9zzzuuuu:
2909 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2910 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2913 ; AVX2-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
2914 ; AVX2-SLOW: # %bb.0:
2915 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
2916 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2917 ; AVX2-SLOW-NEXT: retq
2919 ; AVX2-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
2920 ; AVX2-FAST: # %bb.0:
2921 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2922 ; AVX2-FAST-NEXT: retq
2924 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
2925 ; AVX512VL-SLOW: # %bb.0:
2926 ; AVX512VL-SLOW-NEXT: vbroadcastss %xmm0, %xmm0
2927 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2928 ; AVX512VL-SLOW-NEXT: retq
2930 ; AVX512VL-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
2931 ; AVX512VL-FAST: # %bb.0:
2932 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2933 ; AVX512VL-FAST-NEXT: retq
2935 ; XOP-LABEL: shuffle_v8i16_9zzzuuuu:
2937 ; XOP-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
2939 %r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2944 define <8 x i16> @shuffle_v8i16_2zzzuuuu(<8 x i16> %x) {
2945 ; SSE-LABEL: shuffle_v8i16_2zzzuuuu:
2947 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2948 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2951 ; AVX1-LABEL: shuffle_v8i16_2zzzuuuu:
2953 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2954 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2957 ; AVX2-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
2958 ; AVX2-SLOW: # %bb.0:
2959 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2960 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2961 ; AVX2-SLOW-NEXT: retq
2963 ; AVX2-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
2964 ; AVX2-FAST: # %bb.0:
2965 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2966 ; AVX2-FAST-NEXT: retq
2968 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
2969 ; AVX512VL-SLOW: # %bb.0:
2970 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2971 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2972 ; AVX512VL-SLOW-NEXT: retq
2974 ; AVX512VL-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
2975 ; AVX512VL-FAST: # %bb.0:
2976 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2977 ; AVX512VL-FAST-NEXT: retq
2979 ; XOP-LABEL: shuffle_v8i16_2zzzuuuu:
2981 ; XOP-NEXT: extrq {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
2983 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
2987 define <8 x i16> @shuffle_v8i16_3uu6zzzz(<8 x i16> %x) {
2988 ; SSE-LABEL: shuffle_v8i16_3uu6zzzz:
2990 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2991 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2994 ; AVX1-LABEL: shuffle_v8i16_3uu6zzzz:
2996 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2997 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3000 ; AVX2-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
3001 ; AVX2-SLOW: # %bb.0:
3002 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3003 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3004 ; AVX2-SLOW-NEXT: retq
3006 ; AVX2-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
3007 ; AVX2-FAST: # %bb.0:
3008 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
3009 ; AVX2-FAST-NEXT: retq
3011 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
3012 ; AVX512VL-SLOW: # %bb.0:
3013 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3014 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3015 ; AVX512VL-SLOW-NEXT: retq
3017 ; AVX512VL-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
3018 ; AVX512VL-FAST: # %bb.0:
3019 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
3020 ; AVX512VL-FAST-NEXT: retq
3022 ; XOP-LABEL: shuffle_v8i16_3uu6zzzz:
3024 ; XOP-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3025 ; XOP-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
3027 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 8, i32 8, i32 8, i32 8>
3031 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
3032 ; SSE2-LABEL: mask_v8i16_012345ef:
3034 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
3035 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
3036 ; SSE2-NEXT: movaps %xmm1, %xmm0
3039 ; SSSE3-LABEL: mask_v8i16_012345ef:
3041 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
3042 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
3043 ; SSSE3-NEXT: movaps %xmm1, %xmm0
3046 ; SSE41-LABEL: mask_v8i16_012345ef:
3048 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
3051 ; AVX-LABEL: mask_v8i16_012345ef:
3053 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
3055 %1 = bitcast <8 x i16> %a to <2 x i64>
3056 %2 = bitcast <8 x i16> %b to <2 x i64>
3057 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
3058 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
3059 %5 = or <2 x i64> %4, %3
3060 %6 = bitcast <2 x i64> %5 to <8 x i16>
3064 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
3065 ; SSE-LABEL: insert_dup_mem_v8i16_i32:
3067 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3068 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3069 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3072 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
3074 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3075 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3076 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3079 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
3080 ; AVX2OR512VL: # %bb.0:
3081 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3082 ; AVX2OR512VL-NEXT: retq
3084 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_i32:
3086 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3087 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3088 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3089 ; XOPAVX1-NEXT: retq
3091 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_i32:
3093 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3094 ; XOPAVX2-NEXT: retq
3095 %tmp = load i32, i32* %ptr, align 4
3096 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3097 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3098 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
3102 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
3103 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
3105 ; SSE-NEXT: movzwl (%rdi), %eax
3106 ; SSE-NEXT: movd %eax, %xmm0
3107 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3108 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3111 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
3113 ; AVX1-NEXT: movzwl (%rdi), %eax
3114 ; AVX1-NEXT: vmovd %eax, %xmm0
3115 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3116 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3119 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
3120 ; AVX2OR512VL: # %bb.0:
3121 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3122 ; AVX2OR512VL-NEXT: retq
3124 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
3126 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
3127 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3128 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3129 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3130 ; XOPAVX1-NEXT: retq
3132 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
3134 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3135 ; XOPAVX2-NEXT: retq
3136 %tmp = load i16, i16* %ptr, align 2
3137 %tmp1 = sext i16 %tmp to i32
3138 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
3139 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3140 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
3144 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
3145 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
3147 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3148 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3149 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3152 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
3154 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3159 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
3160 ; AVX2OR512VL: # %bb.0:
3161 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
3162 ; AVX2OR512VL-NEXT: retq
3164 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
3166 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3167 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3168 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3169 ; XOPAVX1-NEXT: retq
3171 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
3173 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
3174 ; XOPAVX2-NEXT: retq
3175 %tmp = load i32, i32* %ptr, align 4
3176 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3177 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3178 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3182 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
3183 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
3185 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3186 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3187 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3190 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
3192 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3193 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3196 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
3198 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3199 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3202 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
3204 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm0
3205 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3206 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3209 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
3210 ; AVX2OR512VL: # %bb.0:
3211 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
3212 ; AVX2OR512VL-NEXT: retq
3214 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
3216 ; XOPAVX1-NEXT: vbroadcastss (%rdi), %xmm0
3217 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3218 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3219 ; XOPAVX1-NEXT: retq
3221 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
3223 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
3224 ; XOPAVX2-NEXT: retq
3225 %tmp = load i32, i32* %ptr, align 4
3226 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
3227 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3228 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3232 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
3233 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3235 ; SSE-NEXT: movswl (%rdi), %eax
3236 ; SSE-NEXT: movd %eax, %xmm0
3237 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3238 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3241 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3243 ; AVX1-NEXT: movswl (%rdi), %eax
3244 ; AVX1-NEXT: vmovd %eax, %xmm0
3245 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3246 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3249 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3251 ; AVX2-NEXT: movswl (%rdi), %eax
3252 ; AVX2-NEXT: shrl $16, %eax
3253 ; AVX2-NEXT: vmovd %eax, %xmm0
3254 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3257 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3258 ; AVX512VL: # %bb.0:
3259 ; AVX512VL-NEXT: movswl (%rdi), %eax
3260 ; AVX512VL-NEXT: shrl $16, %eax
3261 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
3262 ; AVX512VL-NEXT: retq
3264 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3266 ; XOPAVX1-NEXT: movswl (%rdi), %eax
3267 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3268 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3269 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3270 ; XOPAVX1-NEXT: retq
3272 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
3274 ; XOPAVX2-NEXT: movswl (%rdi), %eax
3275 ; XOPAVX2-NEXT: shrl $16, %eax
3276 ; XOPAVX2-NEXT: vmovd %eax, %xmm0
3277 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3278 ; XOPAVX2-NEXT: retq
3279 %tmp = load i16, i16* %ptr, align 2
3280 %tmp1 = sext i16 %tmp to i32
3281 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
3282 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3283 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3287 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
3288 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3290 ; SSE2-NEXT: movswl (%rdi), %eax
3291 ; SSE2-NEXT: movd %eax, %xmm0
3292 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3293 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3296 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3298 ; SSSE3-NEXT: movswl (%rdi), %eax
3299 ; SSSE3-NEXT: movd %eax, %xmm0
3300 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3303 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3305 ; SSE41-NEXT: movswl (%rdi), %eax
3306 ; SSE41-NEXT: movd %eax, %xmm0
3307 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3310 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3312 ; AVX1-NEXT: movswl (%rdi), %eax
3313 ; AVX1-NEXT: vmovd %eax, %xmm0
3314 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3317 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3319 ; AVX2-NEXT: movswl (%rdi), %eax
3320 ; AVX2-NEXT: shrl $16, %eax
3321 ; AVX2-NEXT: vmovd %eax, %xmm0
3322 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3325 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3326 ; AVX512VL: # %bb.0:
3327 ; AVX512VL-NEXT: movswl (%rdi), %eax
3328 ; AVX512VL-NEXT: shrl $16, %eax
3329 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
3330 ; AVX512VL-NEXT: retq
3332 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3334 ; XOPAVX1-NEXT: movswl (%rdi), %eax
3335 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3336 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3337 ; XOPAVX1-NEXT: retq
3339 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
3341 ; XOPAVX2-NEXT: movswl (%rdi), %eax
3342 ; XOPAVX2-NEXT: shrl $16, %eax
3343 ; XOPAVX2-NEXT: vmovd %eax, %xmm0
3344 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
3345 ; XOPAVX2-NEXT: retq
3346 %tmp = load i16, i16* %ptr, align 2
3347 %tmp1 = sext i16 %tmp to i32
3348 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
3349 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3350 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3354 define <8 x i16> @insert_dup_mem_v8i16_i64(i64* %ptr) {
3355 ; SSE-LABEL: insert_dup_mem_v8i16_i64:
3357 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3358 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3359 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3362 ; AVX1-LABEL: insert_dup_mem_v8i16_i64:
3364 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3365 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3366 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3369 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i64:
3370 ; AVX2OR512VL: # %bb.0:
3371 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3372 ; AVX2OR512VL-NEXT: retq
3374 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_i64:
3376 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3377 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3378 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3379 ; XOPAVX1-NEXT: retq
3381 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_i64:
3383 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3384 ; XOPAVX2-NEXT: retq
3385 %tmp = load i64, i64* %ptr, align 4
3386 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
3387 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3388 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
3392 define <8 x i16> @insert_dup_elt1_mem_v8i16_i64(i64* %ptr) {
3393 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i64:
3395 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3396 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3397 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3400 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i64:
3402 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3403 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3404 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3407 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i64:
3408 ; AVX2OR512VL: # %bb.0:
3409 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
3410 ; AVX2OR512VL-NEXT: retq
3412 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_i64:
3414 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3415 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3416 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3417 ; XOPAVX1-NEXT: retq
3419 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_i64:
3421 ; XOPAVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
3422 ; XOPAVX2-NEXT: retq
3423 %tmp = load i64, i64* %ptr, align 4
3424 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
3425 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3426 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3430 define <8 x i16> @insert_dup_elt3_mem_v8i16_i64(i64* %ptr) {
3431 ; SSE-LABEL: insert_dup_elt3_mem_v8i16_i64:
3433 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3434 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3435 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3438 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i64:
3440 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3441 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3442 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3445 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i64:
3446 ; AVX2OR512VL: # %bb.0:
3447 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %xmm0
3448 ; AVX2OR512VL-NEXT: retq
3450 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_i64:
3452 ; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
3453 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3454 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3455 ; XOPAVX1-NEXT: retq
3457 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_i64:
3459 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %xmm0
3460 ; XOPAVX2-NEXT: retq
3461 %tmp = load i64, i64* %ptr, align 4
3462 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
3463 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3464 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3468 define <8 x i16> @insert_dup_elt7_mem_v8i16_i64(i64* %ptr) {
3469 ; SSE2-LABEL: insert_dup_elt7_mem_v8i16_i64:
3471 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3472 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3473 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
3474 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
3477 ; SSSE3-LABEL: insert_dup_elt7_mem_v8i16_i64:
3479 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3480 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
3483 ; SSE41-LABEL: insert_dup_elt7_mem_v8i16_i64:
3485 ; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3486 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
3489 ; AVX1-LABEL: insert_dup_elt7_mem_v8i16_i64:
3491 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
3492 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
3493 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
3496 ; AVX2OR512VL-LABEL: insert_dup_elt7_mem_v8i16_i64:
3497 ; AVX2OR512VL: # %bb.0:
3498 ; AVX2OR512VL-NEXT: vpbroadcastw 6(%rdi), %xmm0
3499 ; AVX2OR512VL-NEXT: retq
3501 ; XOPAVX1-LABEL: insert_dup_elt7_mem_v8i16_i64:
3503 ; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
3504 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
3505 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
3506 ; XOPAVX1-NEXT: retq
3508 ; XOPAVX2-LABEL: insert_dup_elt7_mem_v8i16_i64:
3510 ; XOPAVX2-NEXT: vpbroadcastw 6(%rdi), %xmm0
3511 ; XOPAVX2-NEXT: retq
3512 %tmp = load i64, i64* %ptr, align 4
3513 %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
3514 %tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
3515 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
3519 define <8 x i16> @insert_dup_mem_v8i16_sext_i16_i64(i16* %ptr) {
3520 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3522 ; SSE-NEXT: movzwl (%rdi), %eax
3523 ; SSE-NEXT: movd %eax, %xmm0
3524 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3525 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3528 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3530 ; AVX1-NEXT: movzwl (%rdi), %eax
3531 ; AVX1-NEXT: vmovd %eax, %xmm0
3532 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3533 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3536 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3537 ; AVX2OR512VL: # %bb.0:
3538 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
3539 ; AVX2OR512VL-NEXT: retq
3541 ; XOPAVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3543 ; XOPAVX1-NEXT: movzwl (%rdi), %eax
3544 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
3545 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3546 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3547 ; XOPAVX1-NEXT: retq
3549 ; XOPAVX2-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
3551 ; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3552 ; XOPAVX2-NEXT: retq
3553 %tmp = load i16, i16* %ptr, align 2
3554 %tmp1 = sext i16 %tmp to i64
3555 %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
3556 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
3557 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer