1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL
10 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
11 ; SSE-LABEL: shuffle_v2i64_00:
13 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
16 ; AVX1-LABEL: shuffle_v2i64_00:
18 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
21 ; AVX2-LABEL: shuffle_v2i64_00:
23 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
26 ; AVX512VL-LABEL: shuffle_v2i64_00:
28 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
30 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
31 ret <2 x i64> %shuffle
33 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
34 ; SSE-LABEL: shuffle_v2i64_10:
36 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 ; AVX-LABEL: shuffle_v2i64_10:
41 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
43 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
44 ret <2 x i64> %shuffle
46 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
47 ; SSE-LABEL: shuffle_v2i64_11:
49 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 ; AVX-LABEL: shuffle_v2i64_11:
54 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
56 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
57 ret <2 x i64> %shuffle
59 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
60 ; SSE-LABEL: shuffle_v2i64_22:
62 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
65 ; AVX1-LABEL: shuffle_v2i64_22:
67 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1]
70 ; AVX2-LABEL: shuffle_v2i64_22:
72 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
75 ; AVX512VL-LABEL: shuffle_v2i64_22:
77 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
79 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
80 ret <2 x i64> %shuffle
82 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
83 ; SSE-LABEL: shuffle_v2i64_32:
85 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
88 ; AVX-LABEL: shuffle_v2i64_32:
90 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1]
92 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
93 ret <2 x i64> %shuffle
95 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
96 ; SSE-LABEL: shuffle_v2i64_33:
98 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
101 ; AVX-LABEL: shuffle_v2i64_33:
103 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3]
105 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
106 ret <2 x i64> %shuffle
109 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
110 ; SSE2-LABEL: shuffle_v2f64_00:
112 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
115 ; SSE3-LABEL: shuffle_v2f64_00:
117 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
120 ; SSSE3-LABEL: shuffle_v2f64_00:
122 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
125 ; SSE41-LABEL: shuffle_v2f64_00:
127 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
130 ; AVX-LABEL: shuffle_v2f64_00:
132 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
134 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
135 ret <2 x double> %shuffle
137 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
138 ; SSE-LABEL: shuffle_v2f64_10:
140 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
143 ; AVX-LABEL: shuffle_v2f64_10:
145 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
148 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
149 ret <2 x double> %shuffle
151 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
152 ; SSE-LABEL: shuffle_v2f64_11:
154 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
157 ; AVX-LABEL: shuffle_v2f64_11:
159 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
161 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
162 ret <2 x double> %shuffle
164 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
165 ; SSE2-LABEL: shuffle_v2f64_22:
167 ; SSE2-NEXT: movaps %xmm1, %xmm0
168 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171 ; SSE3-LABEL: shuffle_v2f64_22:
173 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
176 ; SSSE3-LABEL: shuffle_v2f64_22:
178 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
181 ; SSE41-LABEL: shuffle_v2f64_22:
183 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
186 ; AVX-LABEL: shuffle_v2f64_22:
188 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
190 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
191 ret <2 x double> %shuffle
193 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
194 ; SSE-LABEL: shuffle_v2f64_32:
196 ; SSE-NEXT: movaps %xmm1, %xmm0
197 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
200 ; AVX-LABEL: shuffle_v2f64_32:
202 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
205 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
206 ret <2 x double> %shuffle
208 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
209 ; SSE-LABEL: shuffle_v2f64_33:
211 ; SSE-NEXT: movaps %xmm1, %xmm0
212 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
215 ; AVX-LABEL: shuffle_v2f64_33:
217 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
219 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
220 ret <2 x double> %shuffle
222 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
223 ; SSE2-LABEL: shuffle_v2f64_03:
225 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
228 ; SSE3-LABEL: shuffle_v2f64_03:
230 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
233 ; SSSE3-LABEL: shuffle_v2f64_03:
235 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
238 ; SSE41-LABEL: shuffle_v2f64_03:
240 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
243 ; AVX-LABEL: shuffle_v2f64_03:
245 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
247 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
248 ret <2 x double> %shuffle
250 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
251 ; SSE2-LABEL: shuffle_v2f64_21:
253 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
256 ; SSE3-LABEL: shuffle_v2f64_21:
258 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
261 ; SSSE3-LABEL: shuffle_v2f64_21:
263 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
266 ; SSE41-LABEL: shuffle_v2f64_21:
268 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
271 ; AVX-LABEL: shuffle_v2f64_21:
273 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
275 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
276 ret <2 x double> %shuffle
278 define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) {
279 ; SSE2-LABEL: shuffle_v2f64_u2:
281 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
284 ; SSE3-LABEL: shuffle_v2f64_u2:
286 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
289 ; SSSE3-LABEL: shuffle_v2f64_u2:
291 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
294 ; SSE41-LABEL: shuffle_v2f64_u2:
296 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
299 ; AVX-LABEL: shuffle_v2f64_u2:
301 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
303 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2>
304 ret <2 x double> %shuffle
306 define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) {
307 ; SSE-LABEL: shuffle_v2f64_3u:
309 ; SSE-NEXT: movaps %xmm1, %xmm0
310 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
313 ; AVX-LABEL: shuffle_v2f64_3u:
315 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
317 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef>
318 ret <2 x double> %shuffle
321 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
322 ; SSE-LABEL: shuffle_v2i64_02:
324 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
327 ; AVX-LABEL: shuffle_v2i64_02:
329 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
331 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
332 ret <2 x i64> %shuffle
334 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
335 ; SSE-LABEL: shuffle_v2i64_02_copy:
337 ; SSE-NEXT: movaps %xmm1, %xmm0
338 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
341 ; AVX-LABEL: shuffle_v2i64_02_copy:
343 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
345 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
346 ret <2 x i64> %shuffle
348 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
349 ; SSE2-LABEL: shuffle_v2i64_03:
351 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
354 ; SSE3-LABEL: shuffle_v2i64_03:
356 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
359 ; SSSE3-LABEL: shuffle_v2i64_03:
361 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
364 ; SSE41-LABEL: shuffle_v2i64_03:
366 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
369 ; AVX-LABEL: shuffle_v2i64_03:
371 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
373 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
374 ret <2 x i64> %shuffle
376 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
377 ; SSE2-LABEL: shuffle_v2i64_03_copy:
379 ; SSE2-NEXT: movaps %xmm1, %xmm0
380 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
383 ; SSE3-LABEL: shuffle_v2i64_03_copy:
385 ; SSE3-NEXT: movaps %xmm1, %xmm0
386 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
389 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
391 ; SSSE3-NEXT: movaps %xmm1, %xmm0
392 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
395 ; SSE41-LABEL: shuffle_v2i64_03_copy:
397 ; SSE41-NEXT: movaps %xmm1, %xmm0
398 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
401 ; AVX-LABEL: shuffle_v2i64_03_copy:
403 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
405 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
406 ret <2 x i64> %shuffle
408 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
409 ; SSE2-LABEL: shuffle_v2i64_12:
411 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
414 ; SSE3-LABEL: shuffle_v2i64_12:
416 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
419 ; SSSE3-LABEL: shuffle_v2i64_12:
421 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
422 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
425 ; SSE41-LABEL: shuffle_v2i64_12:
427 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
428 ; SSE41-NEXT: movdqa %xmm1, %xmm0
431 ; AVX-LABEL: shuffle_v2i64_12:
433 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
435 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
436 ret <2 x i64> %shuffle
438 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
439 ; SSE2-LABEL: shuffle_v2i64_12_copy:
441 ; SSE2-NEXT: movaps %xmm1, %xmm0
442 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
445 ; SSE3-LABEL: shuffle_v2i64_12_copy:
447 ; SSE3-NEXT: movaps %xmm1, %xmm0
448 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
451 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
453 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
454 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
457 ; SSE41-LABEL: shuffle_v2i64_12_copy:
459 ; SSE41-NEXT: movdqa %xmm2, %xmm0
460 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
463 ; AVX-LABEL: shuffle_v2i64_12_copy:
465 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
467 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
468 ret <2 x i64> %shuffle
470 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
471 ; SSE-LABEL: shuffle_v2i64_13:
473 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
476 ; AVX-LABEL: shuffle_v2i64_13:
478 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
480 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
481 ret <2 x i64> %shuffle
483 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
484 ; SSE-LABEL: shuffle_v2i64_13_copy:
486 ; SSE-NEXT: movaps %xmm1, %xmm0
487 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
490 ; AVX-LABEL: shuffle_v2i64_13_copy:
492 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
494 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
495 ret <2 x i64> %shuffle
497 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
498 ; SSE-LABEL: shuffle_v2i64_20:
500 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
501 ; SSE-NEXT: movaps %xmm1, %xmm0
504 ; AVX-LABEL: shuffle_v2i64_20:
506 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
508 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
509 ret <2 x i64> %shuffle
511 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
512 ; SSE-LABEL: shuffle_v2i64_20_copy:
514 ; SSE-NEXT: movaps %xmm2, %xmm0
515 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
518 ; AVX-LABEL: shuffle_v2i64_20_copy:
520 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
522 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
523 ret <2 x i64> %shuffle
525 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
526 ; SSE2-LABEL: shuffle_v2i64_21:
528 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
531 ; SSE3-LABEL: shuffle_v2i64_21:
533 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
536 ; SSSE3-LABEL: shuffle_v2i64_21:
538 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
541 ; SSE41-LABEL: shuffle_v2i64_21:
543 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
546 ; AVX-LABEL: shuffle_v2i64_21:
548 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
550 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
551 ret <2 x i64> %shuffle
553 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
554 ; SSE2-LABEL: shuffle_v2i64_21_copy:
556 ; SSE2-NEXT: movapd %xmm1, %xmm0
557 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
560 ; SSE3-LABEL: shuffle_v2i64_21_copy:
562 ; SSE3-NEXT: movapd %xmm1, %xmm0
563 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
566 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
568 ; SSSE3-NEXT: movapd %xmm1, %xmm0
569 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
572 ; SSE41-LABEL: shuffle_v2i64_21_copy:
574 ; SSE41-NEXT: movaps %xmm1, %xmm0
575 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
578 ; AVX-LABEL: shuffle_v2i64_21_copy:
580 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
582 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
583 ret <2 x i64> %shuffle
585 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
586 ; SSE2-LABEL: shuffle_v2i64_30:
588 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
589 ; SSE2-NEXT: movaps %xmm1, %xmm0
592 ; SSE3-LABEL: shuffle_v2i64_30:
594 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
595 ; SSE3-NEXT: movaps %xmm1, %xmm0
598 ; SSSE3-LABEL: shuffle_v2i64_30:
600 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
603 ; SSE41-LABEL: shuffle_v2i64_30:
605 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
608 ; AVX-LABEL: shuffle_v2i64_30:
610 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
612 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
613 ret <2 x i64> %shuffle
615 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
616 ; SSE2-LABEL: shuffle_v2i64_30_copy:
618 ; SSE2-NEXT: movaps %xmm2, %xmm0
619 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
622 ; SSE3-LABEL: shuffle_v2i64_30_copy:
624 ; SSE3-NEXT: movaps %xmm2, %xmm0
625 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
628 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
630 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
631 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
634 ; SSE41-LABEL: shuffle_v2i64_30_copy:
636 ; SSE41-NEXT: movdqa %xmm1, %xmm0
637 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
640 ; AVX-LABEL: shuffle_v2i64_30_copy:
642 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
644 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
645 ret <2 x i64> %shuffle
647 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
648 ; SSE-LABEL: shuffle_v2i64_31:
650 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
651 ; SSE-NEXT: movaps %xmm1, %xmm0
654 ; AVX-LABEL: shuffle_v2i64_31:
656 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
658 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
659 ret <2 x i64> %shuffle
661 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
662 ; SSE-LABEL: shuffle_v2i64_31_copy:
664 ; SSE-NEXT: movaps %xmm2, %xmm0
665 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
668 ; AVX-LABEL: shuffle_v2i64_31_copy:
670 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
672 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
673 ret <2 x i64> %shuffle
676 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
677 ; SSE-LABEL: shuffle_v2i64_0z:
679 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
682 ; AVX-LABEL: shuffle_v2i64_0z:
684 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
686 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
687 ret <2 x i64> %shuffle
690 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
691 ; SSE-LABEL: shuffle_v2i64_1z:
693 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
696 ; AVX-LABEL: shuffle_v2i64_1z:
698 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
700 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
701 ret <2 x i64> %shuffle
704 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
705 ; SSE-LABEL: shuffle_v2i64_z0:
707 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
710 ; AVX-LABEL: shuffle_v2i64_z0:
712 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
714 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
715 ret <2 x i64> %shuffle
718 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
719 ; SSE2-LABEL: shuffle_v2i64_z1:
721 ; SSE2-NEXT: xorps %xmm1, %xmm1
722 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
723 ; SSE2-NEXT: movaps %xmm1, %xmm0
726 ; SSE3-LABEL: shuffle_v2i64_z1:
728 ; SSE3-NEXT: xorps %xmm1, %xmm1
729 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
730 ; SSE3-NEXT: movaps %xmm1, %xmm0
733 ; SSSE3-LABEL: shuffle_v2i64_z1:
735 ; SSSE3-NEXT: xorps %xmm1, %xmm1
736 ; SSSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
737 ; SSSE3-NEXT: movaps %xmm1, %xmm0
740 ; SSE41-LABEL: shuffle_v2i64_z1:
742 ; SSE41-NEXT: xorps %xmm1, %xmm1
743 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
746 ; AVX-LABEL: shuffle_v2i64_z1:
748 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
749 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
751 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
752 ret <2 x i64> %shuffle
755 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
756 ; SSE-LABEL: shuffle_v2f64_0z:
758 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
761 ; AVX-LABEL: shuffle_v2f64_0z:
763 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
765 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
766 ret <2 x double> %shuffle
769 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
770 ; SSE-LABEL: shuffle_v2f64_1z:
772 ; SSE-NEXT: xorps %xmm1, %xmm1
773 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
776 ; AVX-LABEL: shuffle_v2f64_1z:
778 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
779 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
781 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
782 ret <2 x double> %shuffle
785 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
786 ; SSE-LABEL: shuffle_v2f64_z0:
788 ; SSE-NEXT: xorps %xmm1, %xmm1
789 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
790 ; SSE-NEXT: movaps %xmm1, %xmm0
793 ; AVX-LABEL: shuffle_v2f64_z0:
795 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
796 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
798 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
799 ret <2 x double> %shuffle
802 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
803 ; SSE2-LABEL: shuffle_v2f64_z1:
805 ; SSE2-NEXT: xorpd %xmm1, %xmm1
806 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
809 ; SSE3-LABEL: shuffle_v2f64_z1:
811 ; SSE3-NEXT: xorpd %xmm1, %xmm1
812 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
815 ; SSSE3-LABEL: shuffle_v2f64_z1:
817 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
818 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
821 ; SSE41-LABEL: shuffle_v2f64_z1:
823 ; SSE41-NEXT: xorps %xmm1, %xmm1
824 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
827 ; AVX-LABEL: shuffle_v2f64_z1:
829 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
830 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
832 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
833 ret <2 x double> %shuffle
836 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
837 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
839 ; SSE-NEXT: xorps %xmm1, %xmm1
840 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
843 ; AVX-LABEL: shuffle_v2f64_bitcast_1z:
845 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
846 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
848 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
849 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
850 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
851 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
852 ret <2 x double> %bitcast64
855 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
856 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
858 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
861 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
863 ; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
866 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
868 ; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
871 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
873 ; SSE41-NEXT: xorps %xmm1, %xmm1
874 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
877 ; AVX-LABEL: shuffle_v2i64_bitcast_z123:
879 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
880 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
882 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
883 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
884 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
885 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
889 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
890 ; SSE-LABEL: insert_reg_and_zero_v2i64:
892 ; SSE-NEXT: movq %rdi, %xmm0
895 ; AVX-LABEL: insert_reg_and_zero_v2i64:
897 ; AVX-NEXT: vmovq %rdi, %xmm0
899 %v = insertelement <2 x i64> undef, i64 %a, i32 0
900 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
901 ret <2 x i64> %shuffle
904 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
905 ; SSE-LABEL: insert_mem_and_zero_v2i64:
907 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
910 ; AVX-LABEL: insert_mem_and_zero_v2i64:
912 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
914 %a = load i64, i64* %ptr
915 %v = insertelement <2 x i64> undef, i64 %a, i32 0
916 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
917 ret <2 x i64> %shuffle
920 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
921 ; SSE-LABEL: insert_reg_and_zero_v2f64:
923 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
926 ; AVX-LABEL: insert_reg_and_zero_v2f64:
928 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
930 %v = insertelement <2 x double> undef, double %a, i32 0
931 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
932 ret <2 x double> %shuffle
935 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
936 ; SSE-LABEL: insert_mem_and_zero_v2f64:
938 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
941 ; AVX-LABEL: insert_mem_and_zero_v2f64:
943 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
945 %a = load double, double* %ptr
946 %v = insertelement <2 x double> undef, double %a, i32 0
947 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
948 ret <2 x double> %shuffle
951 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
952 ; SSE2-LABEL: insert_reg_lo_v2i64:
954 ; SSE2-NEXT: movq %rdi, %xmm1
955 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
958 ; SSE3-LABEL: insert_reg_lo_v2i64:
960 ; SSE3-NEXT: movq %rdi, %xmm1
961 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
964 ; SSSE3-LABEL: insert_reg_lo_v2i64:
966 ; SSSE3-NEXT: movq %rdi, %xmm1
967 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
970 ; SSE41-LABEL: insert_reg_lo_v2i64:
972 ; SSE41-NEXT: pinsrq $0, %rdi, %xmm0
975 ; AVX-LABEL: insert_reg_lo_v2i64:
977 ; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
979 %v = insertelement <2 x i64> undef, i64 %a, i32 0
980 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
981 ret <2 x i64> %shuffle
984 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
985 ; SSE2-LABEL: insert_mem_lo_v2i64:
987 ; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
990 ; SSE3-LABEL: insert_mem_lo_v2i64:
992 ; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
995 ; SSSE3-LABEL: insert_mem_lo_v2i64:
997 ; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1000 ; SSE41-LABEL: insert_mem_lo_v2i64:
1002 ; SSE41-NEXT: pinsrq $0, (%rdi), %xmm0
1005 ; AVX-LABEL: insert_mem_lo_v2i64:
1007 ; AVX-NEXT: vpinsrq $0, (%rdi), %xmm0, %xmm0
1009 %a = load i64, i64* %ptr
1010 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1011 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1012 ret <2 x i64> %shuffle
1015 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1016 ; SSE2-LABEL: insert_reg_hi_v2i64:
1018 ; SSE2-NEXT: movq %rdi, %xmm1
1019 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1022 ; SSE3-LABEL: insert_reg_hi_v2i64:
1024 ; SSE3-NEXT: movq %rdi, %xmm1
1025 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1028 ; SSSE3-LABEL: insert_reg_hi_v2i64:
1030 ; SSSE3-NEXT: movq %rdi, %xmm1
1031 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1034 ; SSE41-LABEL: insert_reg_hi_v2i64:
1036 ; SSE41-NEXT: pinsrq $1, %rdi, %xmm0
1039 ; AVX-LABEL: insert_reg_hi_v2i64:
1041 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
1043 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1044 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1045 ret <2 x i64> %shuffle
1048 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1049 ; SSE2-LABEL: insert_mem_hi_v2i64:
1051 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1052 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1055 ; SSE3-LABEL: insert_mem_hi_v2i64:
1057 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1058 ; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1061 ; SSSE3-LABEL: insert_mem_hi_v2i64:
1063 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1064 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1067 ; SSE41-LABEL: insert_mem_hi_v2i64:
1069 ; SSE41-NEXT: pinsrq $1, (%rdi), %xmm0
1072 ; AVX-LABEL: insert_mem_hi_v2i64:
1074 ; AVX-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0
1076 %a = load i64, i64* %ptr
1077 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1078 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1079 ret <2 x i64> %shuffle
1082 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1083 ; SSE2-LABEL: insert_reg_lo_v2f64:
1085 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1088 ; SSE3-LABEL: insert_reg_lo_v2f64:
1090 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1093 ; SSSE3-LABEL: insert_reg_lo_v2f64:
1095 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1098 ; SSE41-LABEL: insert_reg_lo_v2f64:
1100 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1103 ; AVX-LABEL: insert_reg_lo_v2f64:
1105 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1107 %v = insertelement <2 x double> undef, double %a, i32 0
1108 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1109 ret <2 x double> %shuffle
1112 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1113 ; SSE-LABEL: insert_mem_lo_v2f64:
1115 ; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1118 ; AVX-LABEL: insert_mem_lo_v2f64:
1120 ; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1122 %a = load double, double* %ptr
1123 %v = insertelement <2 x double> undef, double %a, i32 0
1124 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1125 ret <2 x double> %shuffle
1128 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1129 ; SSE-LABEL: insert_reg_hi_v2f64:
1131 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1132 ; SSE-NEXT: movaps %xmm1, %xmm0
1135 ; AVX-LABEL: insert_reg_hi_v2f64:
1137 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1139 %v = insertelement <2 x double> undef, double %a, i32 0
1140 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1141 ret <2 x double> %shuffle
1144 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1145 ; SSE-LABEL: insert_mem_hi_v2f64:
1147 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1150 ; AVX-LABEL: insert_mem_hi_v2f64:
1152 ; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1154 %a = load double, double* %ptr
1155 %v = insertelement <2 x double> undef, double %a, i32 0
1156 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1157 ret <2 x double> %shuffle
1160 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1161 ; SSE2-LABEL: insert_dup_reg_v2f64:
1163 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1166 ; SSE3-LABEL: insert_dup_reg_v2f64:
1168 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1171 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1173 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1176 ; SSE41-LABEL: insert_dup_reg_v2f64:
1178 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1181 ; AVX-LABEL: insert_dup_reg_v2f64:
1183 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1185 %v = insertelement <2 x double> undef, double %a, i32 0
1186 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1187 ret <2 x double> %shuffle
1190 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1191 ; SSE2-LABEL: insert_dup_mem_v2f64:
1193 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1194 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1197 ; SSE3-LABEL: insert_dup_mem_v2f64:
1199 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1202 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1204 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1207 ; SSE41-LABEL: insert_dup_mem_v2f64:
1209 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1212 ; AVX-LABEL: insert_dup_mem_v2f64:
1214 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1216 %a = load double, double* %ptr
1217 %v = insertelement <2 x double> undef, double %a, i32 0
1218 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1219 ret <2 x double> %shuffle
1222 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1223 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1225 ; SSE2-NEXT: movaps (%rdi), %xmm0
1226 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1229 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1231 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1234 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1236 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1239 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1241 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1244 ; AVX-LABEL: insert_dup_mem128_v2f64:
1246 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1248 %v = load <2 x double>, <2 x double>* %ptr
1249 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1250 ret <2 x double> %shuffle
1254 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1255 ; SSE-LABEL: insert_dup_mem_v2i64:
1257 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1258 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1261 ; AVX-LABEL: insert_dup_mem_v2i64:
1263 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1265 %tmp = load i64, i64* %ptr, align 1
1266 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1267 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1271 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1272 ; SSE-LABEL: shuffle_mem_v2f64_10:
1274 ; SSE-NEXT: movaps (%rdi), %xmm0
1275 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
1278 ; AVX-LABEL: shuffle_mem_v2f64_10:
1280 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1283 %a = load <2 x double>, <2 x double>* %ptr
1284 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1285 ret <2 x double> %shuffle
1288 define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) {
1289 ; SSE-LABEL: shuffle_mem_v2f64_31:
1291 ; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1294 ; AVX-LABEL: shuffle_mem_v2f64_31:
1296 ; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1298 %c = load <2 x double>, <2 x double>* %b
1299 %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1>
1303 define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) {
1304 ; SSE-LABEL: shuffle_mem_v2f64_02:
1306 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1309 ; AVX-LABEL: shuffle_mem_v2f64_02:
1311 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1313 %b = load <2 x double>, <2 x double>* %pb, align 1
1314 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
1315 ret <2 x double> %shuffle
1318 define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) {
1319 ; SSE2-LABEL: shuffle_mem_v2f64_21:
1321 ; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1324 ; SSE3-LABEL: shuffle_mem_v2f64_21:
1326 ; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1329 ; SSSE3-LABEL: shuffle_mem_v2f64_21:
1331 ; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1334 ; SSE41-LABEL: shuffle_mem_v2f64_21:
1336 ; SSE41-NEXT: movups (%rdi), %xmm1
1337 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1340 ; AVX-LABEL: shuffle_mem_v2f64_21:
1342 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1344 %b = load <2 x double>, <2 x double>* %pb, align 1
1345 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
1346 ret <2 x double> %shuffle