1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12 ; SSE-LABEL: shuffle_v8i16_01012323:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
17 ; AVX-LABEL: shuffle_v8i16_01012323:
19 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22 ret <8 x i16> %shuffle
24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25 ; SSE-LABEL: shuffle_v8i16_67452301:
27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
30 ; AVX-LABEL: shuffle_v8i16_67452301:
32 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35 ret <8 x i16> %shuffle
37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38 ; SSE2-LABEL: shuffle_v8i16_456789AB:
40 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
43 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
49 ; SSE41-LABEL: shuffle_v8i16_456789AB:
51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52 ; SSE41-NEXT: movdqa %xmm1, %xmm0
55 ; AVX-LABEL: shuffle_v8i16_456789AB:
57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60 ret <8 x i16> %shuffle
63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64 ; SSE-LABEL: shuffle_v8i16_00000000:
66 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
70 ; AVX1-LABEL: shuffle_v8i16_00000000:
72 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
76 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
77 ; AVX2OR512VL: # %bb.0:
78 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0
79 ; AVX2OR512VL-NEXT: retq
80 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
81 ret <8 x i16> %shuffle
83 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
84 ; SSE-LABEL: shuffle_v8i16_00004444:
86 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
87 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
90 ; AVX1-LABEL: shuffle_v8i16_00004444:
92 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
93 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
96 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
98 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
99 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
100 ; AVX2-SLOW-NEXT: retq
102 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
103 ; AVX2-FAST: # %bb.0:
104 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
105 ; AVX2-FAST-NEXT: retq
107 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
108 ; AVX512VL-SLOW: # %bb.0:
109 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
111 ; AVX512VL-SLOW-NEXT: retq
113 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
114 ; AVX512VL-FAST: # %bb.0:
115 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
116 ; AVX512VL-FAST-NEXT: retq
117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
118 ret <8 x i16> %shuffle
120 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
121 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
123 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
126 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
128 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
130 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
131 ret <8 x i16> %shuffle
133 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
134 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
136 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
139 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
141 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
143 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
144 ret <8 x i16> %shuffle
146 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
147 ; SSE-LABEL: shuffle_v8i16_31206745:
149 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
150 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
153 ; AVX1-LABEL: shuffle_v8i16_31206745:
155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
159 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
160 ; AVX2-SLOW: # %bb.0:
161 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
162 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
163 ; AVX2-SLOW-NEXT: retq
165 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
166 ; AVX2-FAST: # %bb.0:
167 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
168 ; AVX2-FAST-NEXT: retq
170 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
171 ; AVX512VL-SLOW: # %bb.0:
172 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
173 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
174 ; AVX512VL-SLOW-NEXT: retq
176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
177 ; AVX512VL-FAST: # %bb.0:
178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
179 ; AVX512VL-FAST-NEXT: retq
180 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
181 ret <8 x i16> %shuffle
183 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
184 ; SSE2-LABEL: shuffle_v8i16_44440000:
186 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
187 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
188 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
191 ; SSSE3-LABEL: shuffle_v8i16_44440000:
193 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
196 ; SSE41-LABEL: shuffle_v8i16_44440000:
198 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
201 ; AVX-LABEL: shuffle_v8i16_44440000:
203 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
206 ret <8 x i16> %shuffle
208 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
209 ; SSE-LABEL: shuffle_v8i16_23016745:
211 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
214 ; AVX-LABEL: shuffle_v8i16_23016745:
216 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
218 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
219 ret <8 x i16> %shuffle
221 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
222 ; SSE-LABEL: shuffle_v8i16_23026745:
224 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
225 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
228 ; AVX1-LABEL: shuffle_v8i16_23026745:
230 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
234 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
235 ; AVX2-SLOW: # %bb.0:
236 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
237 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
238 ; AVX2-SLOW-NEXT: retq
240 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
241 ; AVX2-FAST: # %bb.0:
242 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
243 ; AVX2-FAST-NEXT: retq
245 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
246 ; AVX512VL-SLOW: # %bb.0:
247 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
248 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
249 ; AVX512VL-SLOW-NEXT: retq
251 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
252 ; AVX512VL-FAST: # %bb.0:
253 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
254 ; AVX512VL-FAST-NEXT: retq
255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
256 ret <8 x i16> %shuffle
258 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
259 ; SSE-LABEL: shuffle_v8i16_23016747:
261 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
262 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
265 ; AVX1-LABEL: shuffle_v8i16_23016747:
267 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
268 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
271 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
272 ; AVX2-SLOW: # %bb.0:
273 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
274 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
275 ; AVX2-SLOW-NEXT: retq
277 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
278 ; AVX2-FAST: # %bb.0:
279 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
280 ; AVX2-FAST-NEXT: retq
282 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
283 ; AVX512VL-SLOW: # %bb.0:
284 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
285 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
286 ; AVX512VL-SLOW-NEXT: retq
288 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
289 ; AVX512VL-FAST: # %bb.0:
290 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
291 ; AVX512VL-FAST-NEXT: retq
292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
293 ret <8 x i16> %shuffle
295 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
296 ; SSE2-LABEL: shuffle_v8i16_75643120:
298 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
299 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
300 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
303 ; SSSE3-LABEL: shuffle_v8i16_75643120:
305 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
308 ; SSE41-LABEL: shuffle_v8i16_75643120:
310 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
313 ; AVX-LABEL: shuffle_v8i16_75643120:
315 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
317 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
318 ret <8 x i16> %shuffle
321 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
322 ; SSE2-LABEL: shuffle_v8i16_10545410:
324 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
325 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
326 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
329 ; SSSE3-LABEL: shuffle_v8i16_10545410:
331 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
334 ; SSE41-LABEL: shuffle_v8i16_10545410:
336 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
339 ; AVX-LABEL: shuffle_v8i16_10545410:
341 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
343 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
344 ret <8 x i16> %shuffle
346 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
347 ; SSE2-LABEL: shuffle_v8i16_54105410:
349 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
350 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
351 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
354 ; SSSE3-LABEL: shuffle_v8i16_54105410:
356 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
359 ; SSE41-LABEL: shuffle_v8i16_54105410:
361 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
364 ; AVX-LABEL: shuffle_v8i16_54105410:
366 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
368 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
369 ret <8 x i16> %shuffle
371 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
372 ; SSE2-LABEL: shuffle_v8i16_54101054:
374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
376 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
379 ; SSSE3-LABEL: shuffle_v8i16_54101054:
381 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
384 ; SSE41-LABEL: shuffle_v8i16_54101054:
386 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
389 ; AVX-LABEL: shuffle_v8i16_54101054:
391 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
394 ret <8 x i16> %shuffle
396 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
397 ; SSE2-LABEL: shuffle_v8i16_04400440:
399 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
400 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
401 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
404 ; SSSE3-LABEL: shuffle_v8i16_04400440:
406 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
409 ; SSE41-LABEL: shuffle_v8i16_04400440:
411 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
414 ; AVX-LABEL: shuffle_v8i16_04400440:
416 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
418 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
419 ret <8 x i16> %shuffle
421 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
422 ; SSE2-LABEL: shuffle_v8i16_40044004:
424 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
425 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
426 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
429 ; SSSE3-LABEL: shuffle_v8i16_40044004:
431 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
434 ; SSE41-LABEL: shuffle_v8i16_40044004:
436 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
439 ; AVX-LABEL: shuffle_v8i16_40044004:
441 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
443 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
444 ret <8 x i16> %shuffle
447 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
448 ; SSE2-LABEL: shuffle_v8i16_26405173:
450 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
451 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
452 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
453 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
457 ; SSSE3-LABEL: shuffle_v8i16_26405173:
459 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
462 ; SSE41-LABEL: shuffle_v8i16_26405173:
464 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
467 ; AVX-LABEL: shuffle_v8i16_26405173:
469 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
471 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
472 ret <8 x i16> %shuffle
474 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
475 ; SSE2-LABEL: shuffle_v8i16_20645173:
477 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
478 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
479 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
480 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
481 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
484 ; SSSE3-LABEL: shuffle_v8i16_20645173:
486 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
489 ; SSE41-LABEL: shuffle_v8i16_20645173:
491 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
494 ; AVX-LABEL: shuffle_v8i16_20645173:
496 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
498 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
499 ret <8 x i16> %shuffle
501 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
502 ; SSE2-LABEL: shuffle_v8i16_26401375:
504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
505 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
506 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
507 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
510 ; SSSE3-LABEL: shuffle_v8i16_26401375:
512 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
515 ; SSE41-LABEL: shuffle_v8i16_26401375:
517 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
520 ; AVX-LABEL: shuffle_v8i16_26401375:
522 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
524 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
525 ret <8 x i16> %shuffle
528 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
529 ; SSE2-LABEL: shuffle_v8i16_66751643:
531 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
532 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
534 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
535 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
538 ; SSSE3-LABEL: shuffle_v8i16_66751643:
540 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
543 ; SSE41-LABEL: shuffle_v8i16_66751643:
545 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
548 ; AVX-LABEL: shuffle_v8i16_66751643:
550 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
553 ret <8 x i16> %shuffle
556 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
557 ; SSE2-LABEL: shuffle_v8i16_60514754:
559 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
560 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
561 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
562 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
565 ; SSSE3-LABEL: shuffle_v8i16_60514754:
567 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
570 ; SSE41-LABEL: shuffle_v8i16_60514754:
572 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
575 ; AVX-LABEL: shuffle_v8i16_60514754:
577 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
579 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
580 ret <8 x i16> %shuffle
583 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
584 ; SSE2-LABEL: shuffle_v8i16_00444444:
586 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
587 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
588 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
591 ; SSSE3-LABEL: shuffle_v8i16_00444444:
593 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
596 ; SSE41-LABEL: shuffle_v8i16_00444444:
598 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
601 ; AVX-LABEL: shuffle_v8i16_00444444:
603 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
605 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
606 ret <8 x i16> %shuffle
608 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
609 ; SSE2-LABEL: shuffle_v8i16_44004444:
611 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
612 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
613 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
616 ; SSSE3-LABEL: shuffle_v8i16_44004444:
618 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
621 ; SSE41-LABEL: shuffle_v8i16_44004444:
623 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
626 ; AVX-LABEL: shuffle_v8i16_44004444:
628 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
630 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
631 ret <8 x i16> %shuffle
633 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
634 ; SSE2-LABEL: shuffle_v8i16_04404444:
636 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
637 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
638 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
641 ; SSSE3-LABEL: shuffle_v8i16_04404444:
643 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
646 ; SSE41-LABEL: shuffle_v8i16_04404444:
648 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
651 ; AVX-LABEL: shuffle_v8i16_04404444:
653 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
655 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
656 ret <8 x i16> %shuffle
658 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
659 ; SSE2-LABEL: shuffle_v8i16_04400000:
661 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
662 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
663 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
666 ; SSSE3-LABEL: shuffle_v8i16_04400000:
668 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
671 ; SSE41-LABEL: shuffle_v8i16_04400000:
673 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
676 ; AVX-LABEL: shuffle_v8i16_04400000:
678 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
680 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
681 ret <8 x i16> %shuffle
683 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
684 ; SSE-LABEL: shuffle_v8i16_04404567:
686 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
687 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
690 ; AVX1-LABEL: shuffle_v8i16_04404567:
692 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
693 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
696 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
697 ; AVX2-SLOW: # %bb.0:
698 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
699 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
700 ; AVX2-SLOW-NEXT: retq
702 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
703 ; AVX2-FAST: # %bb.0:
704 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
705 ; AVX2-FAST-NEXT: retq
707 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
708 ; AVX512VL-SLOW: # %bb.0:
709 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
710 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
711 ; AVX512VL-SLOW-NEXT: retq
713 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
714 ; AVX512VL-FAST: # %bb.0:
715 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
716 ; AVX512VL-FAST-NEXT: retq
717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
718 ret <8 x i16> %shuffle
721 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
722 ; SSE2-LABEL: shuffle_v8i16_0X444444:
724 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
725 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
726 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
729 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
731 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
734 ; SSE41-LABEL: shuffle_v8i16_0X444444:
736 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
739 ; AVX-LABEL: shuffle_v8i16_0X444444:
741 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
743 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
744 ret <8 x i16> %shuffle
746 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
747 ; SSE2-LABEL: shuffle_v8i16_44X04444:
749 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
750 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
751 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
754 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
756 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
759 ; SSE41-LABEL: shuffle_v8i16_44X04444:
761 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
764 ; AVX-LABEL: shuffle_v8i16_44X04444:
766 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
768 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
769 ret <8 x i16> %shuffle
771 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
772 ; SSE2-LABEL: shuffle_v8i16_X4404444:
774 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
775 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
776 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
779 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
781 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
784 ; SSE41-LABEL: shuffle_v8i16_X4404444:
786 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
789 ; AVX-LABEL: shuffle_v8i16_X4404444:
791 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
793 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
794 ret <8 x i16> %shuffle
797 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
798 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
800 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
801 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
802 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
805 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
807 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
810 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
812 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
815 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
817 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
819 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
820 ret <8 x i16> %shuffle
823 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
824 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
826 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
827 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
828 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
831 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
833 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
836 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
838 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
841 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
843 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
845 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
846 ret <8 x i16> %shuffle
849 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
850 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
852 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
853 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
854 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
857 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
859 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
862 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
864 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
867 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
869 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
871 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
872 ret <8 x i16> %shuffle
875 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
876 ; SSE2-LABEL: shuffle_v8i16_01274563:
878 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
879 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
880 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
883 ; SSSE3-LABEL: shuffle_v8i16_01274563:
885 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
888 ; SSE41-LABEL: shuffle_v8i16_01274563:
890 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
893 ; AVX-LABEL: shuffle_v8i16_01274563:
895 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
897 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
898 ret <8 x i16> %shuffle
901 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
902 ; SSE2-LABEL: shuffle_v8i16_45630127:
904 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
905 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
906 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
909 ; SSSE3-LABEL: shuffle_v8i16_45630127:
911 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
914 ; SSE41-LABEL: shuffle_v8i16_45630127:
916 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
919 ; AVX-LABEL: shuffle_v8i16_45630127:
921 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
923 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
924 ret <8 x i16> %shuffle
927 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
928 ; SSE2-LABEL: shuffle_v8i16_37102735:
930 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
931 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
932 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
933 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
934 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
935 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
938 ; SSSE3-LABEL: shuffle_v8i16_37102735:
940 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
943 ; SSE41-LABEL: shuffle_v8i16_37102735:
945 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
948 ; AVX-LABEL: shuffle_v8i16_37102735:
950 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
952 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
953 ret <8 x i16> %shuffle
956 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
957 ; SSE-LABEL: shuffle_v8i16_08192a3b:
959 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
962 ; AVX-LABEL: shuffle_v8i16_08192a3b:
964 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
967 ret <8 x i16> %shuffle
970 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
971 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
973 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
974 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
977 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
979 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
980 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
982 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
983 ret <8 x i16> %shuffle
986 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
987 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
989 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
992 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
994 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
996 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
997 ret <8 x i16> %shuffle
1000 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
1001 ; SSE-LABEL: shuffle_v8i16_48596a7b:
1003 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1004 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1007 ; AVX-LABEL: shuffle_v8i16_48596a7b:
1009 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1010 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1012 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
1013 ret <8 x i16> %shuffle
1016 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
1017 ; SSE-LABEL: shuffle_v8i16_08196e7f:
1019 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1020 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1021 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1024 ; AVX-LABEL: shuffle_v8i16_08196e7f:
1026 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1027 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1028 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1030 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
1031 ret <8 x i16> %shuffle
1034 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
1035 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
1037 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1038 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1039 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1042 ; AVX-LABEL: shuffle_v8i16_0c1d6879:
1044 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1045 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1046 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1048 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
1049 ret <8 x i16> %shuffle
1052 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
1053 ; SSE-LABEL: shuffle_v8i16_109832ba:
1055 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1056 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1057 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1060 ; AVX1-LABEL: shuffle_v8i16_109832ba:
1062 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1063 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1064 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1067 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
1068 ; AVX2-SLOW: # %bb.0:
1069 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1070 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1071 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1072 ; AVX2-SLOW-NEXT: retq
1074 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
1075 ; AVX2-FAST: # %bb.0:
1076 ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1077 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1078 ; AVX2-FAST-NEXT: retq
1080 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
1081 ; AVX512VL-SLOW: # %bb.0:
1082 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1083 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1084 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1085 ; AVX512VL-SLOW-NEXT: retq
1087 ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
1088 ; AVX512VL-FAST: # %bb.0:
1089 ; AVX512VL-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1090 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1091 ; AVX512VL-FAST-NEXT: retq
1092 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
1093 ret <8 x i16> %shuffle
1096 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
1097 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
1099 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1100 ; SSE-NEXT: movdqa %xmm1, %xmm0
1103 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
1105 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1107 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
1108 ret <8 x i16> %shuffle
1110 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
1111 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
1113 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1114 ; SSE-NEXT: movdqa %xmm1, %xmm0
1117 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
1119 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
1122 ret <8 x i16> %shuffle
1125 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1126 ; SSE2-LABEL: shuffle_v8i16_0213cedf:
1128 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1129 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1130 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1133 ; SSSE3-LABEL: shuffle_v8i16_0213cedf:
1135 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1136 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1137 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1140 ; SSE41-LABEL: shuffle_v8i16_0213cedf:
1142 ; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1143 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1144 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1147 ; AVX1-LABEL: shuffle_v8i16_0213cedf:
1149 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1150 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1154 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
1155 ; AVX2-SLOW: # %bb.0:
1156 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1157 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1158 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1159 ; AVX2-SLOW-NEXT: retq
1161 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
1162 ; AVX2-FAST: # %bb.0:
1163 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
1164 ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1165 ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1166 ; AVX2-FAST-NEXT: retq
1168 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
1169 ; AVX512VL-SLOW: # %bb.0:
1170 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1171 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1172 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1173 ; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1174 ; AVX512VL-SLOW-NEXT: retq
1176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
1177 ; AVX512VL-FAST: # %bb.0:
1178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
1179 ; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1180 ; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1181 ; AVX512VL-FAST-NEXT: retq
1182 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1183 ret <8 x i16> %shuffle
1186 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1187 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1189 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1190 ; SSE2-NEXT: pand %xmm2, %xmm0
1191 ; SSE2-NEXT: pandn %xmm1, %xmm2
1192 ; SSE2-NEXT: por %xmm0, %xmm2
1193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1194 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1197 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1199 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1200 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1201 ; SSSE3-NEXT: por %xmm1, %xmm0
1204 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1206 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1207 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1208 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1211 ; AVX1-LABEL: shuffle_v8i16_443aXXXX:
1213 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1214 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1215 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1218 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1219 ; AVX2-SLOW: # %bb.0:
1220 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1221 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1222 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1223 ; AVX2-SLOW-NEXT: retq
1225 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
1226 ; AVX2-FAST: # %bb.0:
1227 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1228 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1229 ; AVX2-FAST-NEXT: retq
1231 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1232 ; AVX512VL-SLOW: # %bb.0:
1233 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1234 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1235 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1236 ; AVX512VL-SLOW-NEXT: retq
1238 ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
1239 ; AVX512VL-FAST: # %bb.0:
1240 ; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1241 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1242 ; AVX512VL-FAST-NEXT: retq
1243 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1244 ret <8 x i16> %shuffle
1247 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1248 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1250 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1251 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,0]
1252 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7]
1253 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1254 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1257 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1259 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1260 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1261 ; SSSE3-NEXT: por %xmm1, %xmm0
1264 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1266 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1267 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1270 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1272 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1273 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1276 ; AVX2OR512VL-LABEL: shuffle_v8i16_032dXXXX:
1277 ; AVX2OR512VL: # %bb.0:
1278 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1279 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1280 ; AVX2OR512VL-NEXT: retq
1281 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1282 ret <8 x i16> %shuffle
1284 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1285 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1287 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1290 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1292 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
1294 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1295 ret <8 x i16> %shuffle
1298 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1299 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1301 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1302 ; SSE2-NEXT: pand %xmm2, %xmm0
1303 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1304 ; SSE2-NEXT: pandn %xmm1, %xmm2
1305 ; SSE2-NEXT: por %xmm2, %xmm0
1308 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1310 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1311 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1312 ; SSSE3-NEXT: por %xmm1, %xmm0
1315 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1317 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1318 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1321 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
1323 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1324 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1326 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1327 ret <8 x i16> %shuffle
1330 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1331 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1333 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1334 ; SSE2-NEXT: pand %xmm2, %xmm1
1335 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1336 ; SSE2-NEXT: pandn %xmm0, %xmm2
1337 ; SSE2-NEXT: por %xmm1, %xmm2
1338 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1341 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1343 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1344 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1345 ; SSSE3-NEXT: por %xmm1, %xmm0
1348 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1350 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1351 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1354 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1356 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1357 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1360 ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
1361 ; AVX2OR512VL: # %bb.0:
1362 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0
1363 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1364 ; AVX2OR512VL-NEXT: retq
1365 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1366 ret <8 x i16> %shuffle
1369 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1370 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1372 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1373 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1374 ; SSE2-NEXT: pand %xmm2, %xmm1
1375 ; SSE2-NEXT: pandn %xmm0, %xmm2
1376 ; SSE2-NEXT: por %xmm1, %xmm2
1377 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1380 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1382 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1383 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1384 ; SSSE3-NEXT: por %xmm1, %xmm0
1387 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1389 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1390 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1393 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1395 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1396 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1398 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1399 ret <8 x i16> %shuffle
1402 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1403 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
1405 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1406 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3,2,1]
1407 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7]
1408 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1409 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1410 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1411 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1412 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1415 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1417 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1418 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1419 ; SSSE3-NEXT: por %xmm1, %xmm0
1422 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
1424 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1425 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1428 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
1430 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1431 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1434 ; AVX2OR512VL-LABEL: shuffle_v8i16_012dcde3:
1435 ; AVX2OR512VL: # %bb.0:
1436 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1437 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1438 ; AVX2OR512VL-NEXT: retq
1439 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1440 ret <8 x i16> %shuffle
1443 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1444 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
1446 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1447 ; SSE2-NEXT: andps %xmm2, %xmm0
1448 ; SSE2-NEXT: andnps %xmm1, %xmm2
1449 ; SSE2-NEXT: orps %xmm2, %xmm0
1452 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1454 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1455 ; SSSE3-NEXT: andps %xmm2, %xmm0
1456 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1457 ; SSSE3-NEXT: orps %xmm2, %xmm0
1460 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
1462 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1465 ; AVX-LABEL: shuffle_v8i16_0923cde7:
1467 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1469 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1470 ret <8 x i16> %shuffle
1473 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1474 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1476 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1477 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1478 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1479 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1480 ; SSE2-NEXT: pand %xmm1, %xmm0
1481 ; SSE2-NEXT: pandn %xmm2, %xmm1
1482 ; SSE2-NEXT: por %xmm0, %xmm1
1483 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1486 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1488 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1489 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1490 ; SSSE3-NEXT: por %xmm1, %xmm0
1493 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1495 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1496 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1497 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1498 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1501 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1503 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1504 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1505 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1506 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1509 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1510 ; AVX2-SLOW: # %bb.0:
1511 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1512 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1513 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1514 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1515 ; AVX2-SLOW-NEXT: retq
1517 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
1518 ; AVX2-FAST: # %bb.0:
1519 ; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1520 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1521 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1522 ; AVX2-FAST-NEXT: retq
1524 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1525 ; AVX512VL-SLOW: # %bb.0:
1526 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1527 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1528 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1529 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1530 ; AVX512VL-SLOW-NEXT: retq
1532 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
1533 ; AVX512VL-FAST: # %bb.0:
1534 ; AVX512VL-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1535 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1536 ; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1537 ; AVX512VL-FAST-NEXT: retq
1538 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1539 ret <8 x i16> %shuffle
1542 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1543 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1545 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1546 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1547 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1548 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3]
1551 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1553 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1554 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1555 ; SSSE3-NEXT: por %xmm1, %xmm0
1558 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1560 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1561 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1562 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1565 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1567 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,4,5]
1568 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1569 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1572 ; AVX2OR512VL-LABEL: shuffle_v8i16_XX4X8acX:
1573 ; AVX2OR512VL: # %bb.0:
1574 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1575 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1576 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1577 ; AVX2OR512VL-NEXT: retq
1578 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1579 ret <8 x i16> %shuffle
1582 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1583 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1585 ; SSE-NEXT: movzwl %di, %eax
1586 ; SSE-NEXT: movd %eax, %xmm0
1589 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1591 ; AVX-NEXT: movzwl %di, %eax
1592 ; AVX-NEXT: vmovd %eax, %xmm0
1594 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1595 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1596 ret <8 x i16> %shuffle
1599 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1600 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1602 ; SSE-NEXT: pxor %xmm0, %xmm0
1603 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1606 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1608 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1609 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1611 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1612 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1613 ret <8 x i16> %shuffle
1616 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1617 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1619 ; SSE-NEXT: pxor %xmm0, %xmm0
1620 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1623 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1625 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1626 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1628 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1629 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1630 ret <8 x i16> %shuffle
1633 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1634 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1636 ; SSE-NEXT: pxor %xmm0, %xmm0
1637 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1640 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1642 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1643 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1645 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1646 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1647 ret <8 x i16> %shuffle
1650 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1651 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1653 ; SSE-NEXT: pxor %xmm0, %xmm0
1654 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1657 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1659 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1660 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1662 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1663 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1664 ret <8 x i16> %shuffle
1667 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1668 ; SSE2-LABEL: shuffle_v8i16_def01234:
1670 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1671 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1672 ; SSE2-NEXT: por %xmm1, %xmm0
1675 ; SSSE3-LABEL: shuffle_v8i16_def01234:
1677 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1680 ; SSE41-LABEL: shuffle_v8i16_def01234:
1682 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1685 ; AVX-LABEL: shuffle_v8i16_def01234:
1687 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1689 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1690 ret <8 x i16> %shuffle
1693 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1694 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1696 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1697 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1698 ; SSE2-NEXT: por %xmm1, %xmm0
1701 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1703 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1706 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1708 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1711 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
1713 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1715 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1716 ret <8 x i16> %shuffle
1719 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1720 ; SSE2-LABEL: shuffle_v8i16_56701234:
1722 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1723 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1724 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1725 ; SSE2-NEXT: por %xmm1, %xmm0
1728 ; SSSE3-LABEL: shuffle_v8i16_56701234:
1730 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1733 ; SSE41-LABEL: shuffle_v8i16_56701234:
1735 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1738 ; AVX-LABEL: shuffle_v8i16_56701234:
1740 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1742 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1743 ret <8 x i16> %shuffle
1746 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1747 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1749 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1750 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1751 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1752 ; SSE2-NEXT: por %xmm1, %xmm0
1755 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1757 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1760 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1762 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1765 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
1767 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1769 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1770 ret <8 x i16> %shuffle
1773 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1774 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
1776 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1779 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
1781 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1783 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1784 ret <8 x i16> %shuffle
1787 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1788 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
1790 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1791 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1792 ; SSE2-NEXT: por %xmm1, %xmm0
1795 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1797 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1800 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
1802 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1805 ; AVX-LABEL: shuffle_v8i16_bcdef012:
1807 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1809 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1810 ret <8 x i16> %shuffle
1813 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1814 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1816 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1817 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1818 ; SSE2-NEXT: por %xmm1, %xmm0
1821 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1823 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1826 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1828 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1831 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1833 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1835 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1836 ret <8 x i16> %shuffle
1839 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1840 ; SSE2-LABEL: shuffle_v8i16_34567012:
1842 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1843 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1844 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1845 ; SSE2-NEXT: por %xmm1, %xmm0
1848 ; SSSE3-LABEL: shuffle_v8i16_34567012:
1850 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1853 ; SSE41-LABEL: shuffle_v8i16_34567012:
1855 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1858 ; AVX-LABEL: shuffle_v8i16_34567012:
1860 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1862 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1863 ret <8 x i16> %shuffle
1866 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1867 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1869 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1870 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1871 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1872 ; SSE2-NEXT: por %xmm1, %xmm0
1875 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1877 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1880 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1882 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1885 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
1887 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1889 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1890 ret <8 x i16> %shuffle
1893 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1894 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
1896 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1899 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
1901 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1903 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1904 ret <8 x i16> %shuffle
1907 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1908 ; SSE2-LABEL: shuffle_v8i16_3456789a:
1910 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1911 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1912 ; SSE2-NEXT: por %xmm1, %xmm0
1915 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
1917 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1918 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1921 ; SSE41-LABEL: shuffle_v8i16_3456789a:
1923 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1924 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1927 ; AVX-LABEL: shuffle_v8i16_3456789a:
1929 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1931 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1932 ret <8 x i16> %shuffle
1935 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1936 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1938 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1939 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1940 ; SSE2-NEXT: por %xmm1, %xmm0
1943 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1945 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1946 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1949 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1951 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1952 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1955 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
1957 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1959 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1960 ret <8 x i16> %shuffle
1963 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1964 ; SSE2-LABEL: shuffle_v8i16_56789abc:
1966 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1967 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1968 ; SSE2-NEXT: por %xmm1, %xmm0
1971 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
1973 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1974 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1977 ; SSE41-LABEL: shuffle_v8i16_56789abc:
1979 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1980 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1983 ; AVX-LABEL: shuffle_v8i16_56789abc:
1985 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1987 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1988 ret <8 x i16> %shuffle
1991 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1992 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1994 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1995 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1996 ; SSE2-NEXT: por %xmm1, %xmm0
1999 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
2001 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2002 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
2005 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
2007 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2008 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2011 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
2013 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2015 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2016 ret <8 x i16> %shuffle
2019 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
2020 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
2022 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2023 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2026 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
2028 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2029 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2032 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
2034 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2037 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
2039 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2041 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
2042 ret <8 x i16> %shuffle
2045 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
2046 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
2048 ; SSE2-NEXT: pxor %xmm1, %xmm1
2049 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2050 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2053 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
2055 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2056 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2057 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2060 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
2062 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2065 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
2067 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2069 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2070 ret <8 x i16> %shuffle
2073 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
2074 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
2076 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2079 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
2081 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2084 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
2086 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2089 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
2091 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2093 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
2094 ret <8 x i16> %shuffle
2097 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
2098 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
2100 ; SSE2-NEXT: pxor %xmm1, %xmm1
2101 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2104 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
2106 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2107 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2110 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
2112 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2115 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
2117 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2119 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2120 ret <8 x i16> %shuffle
2123 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
2124 ; SSE-LABEL: shuffle_v8i16_01100110:
2126 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2127 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2130 ; AVX1-LABEL: shuffle_v8i16_01100110:
2132 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2133 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2136 ; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
2137 ; AVX2-SLOW: # %bb.0:
2138 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2139 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2140 ; AVX2-SLOW-NEXT: retq
2142 ; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
2143 ; AVX2-FAST: # %bb.0:
2144 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2145 ; AVX2-FAST-NEXT: retq
2147 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
2148 ; AVX512VL-SLOW: # %bb.0:
2149 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2150 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2151 ; AVX512VL-SLOW-NEXT: retq
2153 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
2154 ; AVX512VL-FAST: # %bb.0:
2155 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2156 ; AVX512VL-FAST-NEXT: retq
2157 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
2158 ret <8 x i16> %shuffle
2161 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
2162 ; SSE-LABEL: shuffle_v8i16_01u0u110:
2164 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2165 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2168 ; AVX1-LABEL: shuffle_v8i16_01u0u110:
2170 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2171 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2174 ; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
2175 ; AVX2-SLOW: # %bb.0:
2176 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2177 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
2178 ; AVX2-SLOW-NEXT: retq
2180 ; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
2181 ; AVX2-FAST: # %bb.0:
2182 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2183 ; AVX2-FAST-NEXT: retq
2185 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
2186 ; AVX512VL-SLOW: # %bb.0:
2187 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2188 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2189 ; AVX512VL-SLOW-NEXT: retq
2191 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
2192 ; AVX512VL-FAST: # %bb.0:
2193 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2194 ; AVX512VL-FAST-NEXT: retq
2195 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
2196 ret <8 x i16> %shuffle
2199 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
2200 ; SSE-LABEL: shuffle_v8i16_467uu675:
2202 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2203 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2206 ; AVX1-LABEL: shuffle_v8i16_467uu675:
2208 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2209 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2212 ; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
2213 ; AVX2-SLOW: # %bb.0:
2214 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2215 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2216 ; AVX2-SLOW-NEXT: retq
2218 ; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
2219 ; AVX2-FAST: # %bb.0:
2220 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2221 ; AVX2-FAST-NEXT: retq
2223 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
2224 ; AVX512VL-SLOW: # %bb.0:
2225 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2226 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2227 ; AVX512VL-SLOW-NEXT: retq
2229 ; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
2230 ; AVX512VL-FAST: # %bb.0:
2231 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2232 ; AVX512VL-FAST-NEXT: retq
2233 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
2234 ret <8 x i16> %shuffle
2238 ; Shuffle to logical bit shifts
2240 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
2241 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
2243 ; SSE-NEXT: pslld $16, %xmm0
2246 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
2248 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
2250 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
2251 ret <8 x i16> %shuffle
2254 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
2255 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
2257 ; SSE-NEXT: psllq $48, %xmm0
2260 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
2262 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
2264 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
2265 ret <8 x i16> %shuffle
2268 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
2269 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
2271 ; SSE-NEXT: psllq $32, %xmm0
2274 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
2276 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2278 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
2279 ret <8 x i16> %shuffle
2282 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
2283 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
2285 ; SSE-NEXT: psllq $16, %xmm0
2288 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
2290 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
2292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
2293 ret <8 x i16> %shuffle
2296 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
2297 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
2299 ; SSE-NEXT: psrld $16, %xmm0
2302 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
2304 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
2306 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
2307 ret <8 x i16> %shuffle
2310 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
2311 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
2313 ; SSE-NEXT: psrlq $16, %xmm0
2316 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
2318 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
2320 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2321 ret <8 x i16> %shuffle
2324 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2325 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
2327 ; SSE-NEXT: psrlq $32, %xmm0
2330 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
2332 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2334 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2335 ret <8 x i16> %shuffle
2338 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2339 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2341 ; SSE-NEXT: psrlq $48, %xmm0
2344 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2346 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2348 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2349 ret <8 x i16> %shuffle
2352 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2353 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2355 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2358 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2360 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2362 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2363 ret <8 x i16> %shuffle
2366 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2367 ; SSE2-LABEL: shuffle_v8i16_0z234567:
2369 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2372 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
2374 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2377 ; SSE41-LABEL: shuffle_v8i16_0z234567:
2379 ; SSE41-NEXT: pxor %xmm1, %xmm1
2380 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2383 ; AVX-LABEL: shuffle_v8i16_0z234567:
2385 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2386 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2388 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2389 ret <8 x i16> %shuffle
2392 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2393 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2395 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2398 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2400 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2403 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2405 ; SSE41-NEXT: pxor %xmm1, %xmm1
2406 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2409 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2411 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2412 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2414 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2415 ret <8 x i16> %shuffle
2418 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2419 ; SSE2-LABEL: shuffle_v8i16_0123456z:
2421 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2424 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
2426 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2429 ; SSE41-LABEL: shuffle_v8i16_0123456z:
2431 ; SSE41-NEXT: pxor %xmm1, %xmm1
2432 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2435 ; AVX-LABEL: shuffle_v8i16_0123456z:
2437 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2438 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2440 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2441 ret <8 x i16> %shuffle
2444 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2445 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2447 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2448 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2449 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2450 ; SSE-NEXT: movdqa %xmm1, %xmm0
2453 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2455 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2456 ; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2457 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2459 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2460 ret <8 x i16> %shuffle
2463 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2464 ; SSE-LABEL: shuffle_v8i16_8012345u:
2466 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2469 ; AVX-LABEL: shuffle_v8i16_8012345u:
2471 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2473 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2475 ret <8 x i16> %shuffle
2479 define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) {
2480 ; SSE-LABEL: shuffle_v8i16_9zzzuuuu:
2482 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2483 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2486 ; AVX1-LABEL: shuffle_v8i16_9zzzuuuu:
2488 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2489 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2492 ; AVX2-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
2493 ; AVX2-SLOW: # %bb.0:
2494 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2495 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2496 ; AVX2-SLOW-NEXT: retq
2498 ; AVX2-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
2499 ; AVX2-FAST: # %bb.0:
2500 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2501 ; AVX2-FAST-NEXT: retq
2503 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
2504 ; AVX512VL-SLOW: # %bb.0:
2505 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
2506 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2507 ; AVX512VL-SLOW-NEXT: retq
2509 ; AVX512VL-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
2510 ; AVX512VL-FAST: # %bb.0:
2511 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2512 ; AVX512VL-FAST-NEXT: retq
2513 %r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2518 define <8 x i16> @shuffle_v8i16_2zzzuuuu(<8 x i16> %x) {
2519 ; SSE-LABEL: shuffle_v8i16_2zzzuuuu:
2521 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2522 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2525 ; AVX1-LABEL: shuffle_v8i16_2zzzuuuu:
2527 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2528 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2531 ; AVX2-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
2532 ; AVX2-SLOW: # %bb.0:
2533 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2534 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2535 ; AVX2-SLOW-NEXT: retq
2537 ; AVX2-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
2538 ; AVX2-FAST: # %bb.0:
2539 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2540 ; AVX2-FAST-NEXT: retq
2542 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
2543 ; AVX512VL-SLOW: # %bb.0:
2544 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2545 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2546 ; AVX512VL-SLOW-NEXT: retq
2548 ; AVX512VL-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
2549 ; AVX512VL-FAST: # %bb.0:
2550 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2551 ; AVX512VL-FAST-NEXT: retq
2552 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
2556 define <8 x i16> @shuffle_v8i16_3uu6zzzz(<8 x i16> %x) {
2557 ; SSE-LABEL: shuffle_v8i16_3uu6zzzz:
2559 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2560 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2563 ; AVX1-LABEL: shuffle_v8i16_3uu6zzzz:
2565 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2566 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2569 ; AVX2-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
2570 ; AVX2-SLOW: # %bb.0:
2571 ; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2572 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2573 ; AVX2-SLOW-NEXT: retq
2575 ; AVX2-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
2576 ; AVX2-FAST: # %bb.0:
2577 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
2578 ; AVX2-FAST-NEXT: retq
2580 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
2581 ; AVX512VL-SLOW: # %bb.0:
2582 ; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2583 ; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
2584 ; AVX512VL-SLOW-NEXT: retq
2586 ; AVX512VL-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
2587 ; AVX512VL-FAST: # %bb.0:
2588 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13],zero,zero,zero,zero,zero,zero,zero,zero
2589 ; AVX512VL-FAST-NEXT: retq
2590 %r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 8, i32 8, i32 8, i32 8>
2594 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
2595 ; SSE2-LABEL: mask_v8i16_012345ef:
2597 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2598 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2599 ; SSE2-NEXT: movaps %xmm1, %xmm0
2602 ; SSSE3-LABEL: mask_v8i16_012345ef:
2604 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2605 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2606 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2609 ; SSE41-LABEL: mask_v8i16_012345ef:
2611 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2614 ; AVX-LABEL: mask_v8i16_012345ef:
2616 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2618 %1 = bitcast <8 x i16> %a to <2 x i64>
2619 %2 = bitcast <8 x i16> %b to <2 x i64>
2620 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
2621 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
2622 %5 = or <2 x i64> %4, %3
2623 %6 = bitcast <2 x i64> %5 to <8 x i16>
2627 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2628 ; SSE-LABEL: insert_dup_mem_v8i16_i32:
2630 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2631 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2632 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2635 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2637 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2638 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2639 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2642 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
2643 ; AVX2OR512VL: # %bb.0:
2644 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
2645 ; AVX2OR512VL-NEXT: retq
2646 %tmp = load i32, i32* %ptr, align 4
2647 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2648 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2649 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2653 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2654 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
2656 ; SSE-NEXT: movswl (%rdi), %eax
2657 ; SSE-NEXT: movd %eax, %xmm0
2658 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2659 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2662 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2664 ; AVX1-NEXT: movswl (%rdi), %eax
2665 ; AVX1-NEXT: vmovd %eax, %xmm0
2666 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2667 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2670 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
2672 ; AVX2-NEXT: movswl (%rdi), %eax
2673 ; AVX2-NEXT: vmovd %eax, %xmm0
2674 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2677 ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
2678 ; AVX512VL: # %bb.0:
2679 ; AVX512VL-NEXT: movswl (%rdi), %eax
2680 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2681 ; AVX512VL-NEXT: retq
2682 %tmp = load i16, i16* %ptr, align 2
2683 %tmp1 = sext i16 %tmp to i32
2684 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2685 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2686 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2690 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2691 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
2693 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2694 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2695 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2698 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2700 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2701 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2702 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2705 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
2706 ; AVX2OR512VL: # %bb.0:
2707 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
2708 ; AVX2OR512VL-NEXT: retq
2709 %tmp = load i32, i32* %ptr, align 4
2710 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2711 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2712 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2716 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2717 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2719 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2720 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
2721 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2724 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2726 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2727 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2730 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2732 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2733 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2736 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2738 ; AVX1-NEXT: vbroadcastss (%rdi), %xmm0
2739 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
2740 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2743 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
2744 ; AVX2OR512VL: # %bb.0:
2745 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
2746 ; AVX2OR512VL-NEXT: retq
2747 %tmp = load i32, i32* %ptr, align 4
2748 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2749 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2750 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2754 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2755 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2757 ; SSE-NEXT: movswl (%rdi), %eax
2758 ; SSE-NEXT: movd %eax, %xmm0
2759 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2760 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2763 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2765 ; AVX1-NEXT: movswl (%rdi), %eax
2766 ; AVX1-NEXT: vmovd %eax, %xmm0
2767 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2768 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2771 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2773 ; AVX2-NEXT: movswl (%rdi), %eax
2774 ; AVX2-NEXT: shrl $16, %eax
2775 ; AVX2-NEXT: vmovd %eax, %xmm0
2776 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2779 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2780 ; AVX512VL: # %bb.0:
2781 ; AVX512VL-NEXT: movswl (%rdi), %eax
2782 ; AVX512VL-NEXT: shrl $16, %eax
2783 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2784 ; AVX512VL-NEXT: retq
2785 %tmp = load i16, i16* %ptr, align 2
2786 %tmp1 = sext i16 %tmp to i32
2787 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2788 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2789 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2793 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2794 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2796 ; SSE2-NEXT: movswl (%rdi), %eax
2797 ; SSE2-NEXT: movd %eax, %xmm0
2798 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
2799 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2802 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2804 ; SSSE3-NEXT: movswl (%rdi), %eax
2805 ; SSSE3-NEXT: movd %eax, %xmm0
2806 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2809 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2811 ; SSE41-NEXT: movswl (%rdi), %eax
2812 ; SSE41-NEXT: movd %eax, %xmm0
2813 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2816 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2818 ; AVX1-NEXT: movswl (%rdi), %eax
2819 ; AVX1-NEXT: vmovd %eax, %xmm0
2820 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2823 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2825 ; AVX2-NEXT: movswl (%rdi), %eax
2826 ; AVX2-NEXT: shrl $16, %eax
2827 ; AVX2-NEXT: vmovd %eax, %xmm0
2828 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2831 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2832 ; AVX512VL: # %bb.0:
2833 ; AVX512VL-NEXT: movswl (%rdi), %eax
2834 ; AVX512VL-NEXT: shrl $16, %eax
2835 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2836 ; AVX512VL-NEXT: retq
2837 %tmp = load i16, i16* %ptr, align 2
2838 %tmp1 = sext i16 %tmp to i32
2839 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2840 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2841 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>