1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
10 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
11 ; SSE-LABEL: shuffle_v2i64_00:
13 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
16 ; AVX1-LABEL: shuffle_v2i64_00:
18 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
21 ; AVX2-LABEL: shuffle_v2i64_00:
23 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
26 ; AVX512VL-LABEL: shuffle_v2i64_00:
28 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
30 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
31 ret <2 x i64> %shuffle
33 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
34 ; SSE-LABEL: shuffle_v2i64_10:
36 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 ; AVX-LABEL: shuffle_v2i64_10:
41 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
43 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
44 ret <2 x i64> %shuffle
46 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
47 ; SSE-LABEL: shuffle_v2i64_11:
49 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 ; AVX-LABEL: shuffle_v2i64_11:
54 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
56 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
57 ret <2 x i64> %shuffle
59 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
60 ; SSE-LABEL: shuffle_v2i64_22:
62 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
65 ; AVX1-LABEL: shuffle_v2i64_22:
67 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1]
70 ; AVX2-LABEL: shuffle_v2i64_22:
72 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
75 ; AVX512VL-LABEL: shuffle_v2i64_22:
77 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
79 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
80 ret <2 x i64> %shuffle
82 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
83 ; SSE-LABEL: shuffle_v2i64_32:
85 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
88 ; AVX-LABEL: shuffle_v2i64_32:
90 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1]
92 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
93 ret <2 x i64> %shuffle
95 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
96 ; SSE-LABEL: shuffle_v2i64_33:
98 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
101 ; AVX-LABEL: shuffle_v2i64_33:
103 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3]
105 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
106 ret <2 x i64> %shuffle
109 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
110 ; SSE2-LABEL: shuffle_v2f64_00:
112 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
115 ; SSE3-LABEL: shuffle_v2f64_00:
117 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
120 ; SSSE3-LABEL: shuffle_v2f64_00:
122 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
125 ; SSE41-LABEL: shuffle_v2f64_00:
127 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
130 ; AVX-LABEL: shuffle_v2f64_00:
132 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
134 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
135 ret <2 x double> %shuffle
137 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
138 ; SSE-LABEL: shuffle_v2f64_10:
140 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
143 ; AVX-LABEL: shuffle_v2f64_10:
145 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
148 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
149 ret <2 x double> %shuffle
151 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
152 ; SSE-LABEL: shuffle_v2f64_11:
154 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
157 ; AVX-LABEL: shuffle_v2f64_11:
159 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
161 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
162 ret <2 x double> %shuffle
164 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
165 ; SSE2-LABEL: shuffle_v2f64_22:
167 ; SSE2-NEXT: movaps %xmm1, %xmm0
168 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171 ; SSE3-LABEL: shuffle_v2f64_22:
173 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
176 ; SSSE3-LABEL: shuffle_v2f64_22:
178 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
181 ; SSE41-LABEL: shuffle_v2f64_22:
183 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
186 ; AVX-LABEL: shuffle_v2f64_22:
188 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
190 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
191 ret <2 x double> %shuffle
193 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
194 ; SSE-LABEL: shuffle_v2f64_32:
196 ; SSE-NEXT: movapd %xmm1, %xmm0
197 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
200 ; AVX-LABEL: shuffle_v2f64_32:
202 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
205 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
206 ret <2 x double> %shuffle
208 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
209 ; SSE-LABEL: shuffle_v2f64_33:
211 ; SSE-NEXT: movaps %xmm1, %xmm0
212 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
215 ; AVX-LABEL: shuffle_v2f64_33:
217 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
219 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
220 ret <2 x double> %shuffle
222 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
223 ; SSE2-LABEL: shuffle_v2f64_03:
225 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
226 ; SSE2-NEXT: movapd %xmm1, %xmm0
229 ; SSE3-LABEL: shuffle_v2f64_03:
231 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
232 ; SSE3-NEXT: movapd %xmm1, %xmm0
235 ; SSSE3-LABEL: shuffle_v2f64_03:
237 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
238 ; SSSE3-NEXT: movapd %xmm1, %xmm0
241 ; SSE41-LABEL: shuffle_v2f64_03:
243 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
246 ; AVX-LABEL: shuffle_v2f64_03:
248 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
250 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
251 ret <2 x double> %shuffle
253 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
254 ; SSE2-LABEL: shuffle_v2f64_21:
256 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
259 ; SSE3-LABEL: shuffle_v2f64_21:
261 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
264 ; SSSE3-LABEL: shuffle_v2f64_21:
266 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
269 ; SSE41-LABEL: shuffle_v2f64_21:
271 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
274 ; AVX-LABEL: shuffle_v2f64_21:
276 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
278 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
279 ret <2 x double> %shuffle
281 define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) {
282 ; SSE2-LABEL: shuffle_v2f64_u2:
284 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
287 ; SSE3-LABEL: shuffle_v2f64_u2:
289 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
292 ; SSSE3-LABEL: shuffle_v2f64_u2:
294 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
297 ; SSE41-LABEL: shuffle_v2f64_u2:
299 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
302 ; AVX-LABEL: shuffle_v2f64_u2:
304 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
306 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2>
307 ret <2 x double> %shuffle
309 define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) {
310 ; SSE-LABEL: shuffle_v2f64_3u:
312 ; SSE-NEXT: movaps %xmm1, %xmm0
313 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
316 ; AVX-LABEL: shuffle_v2f64_3u:
318 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
320 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef>
321 ret <2 x double> %shuffle
324 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
325 ; SSE-LABEL: shuffle_v2i64_02:
327 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
330 ; AVX-LABEL: shuffle_v2i64_02:
332 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
334 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
335 ret <2 x i64> %shuffle
337 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
338 ; SSE-LABEL: shuffle_v2i64_02_copy:
340 ; SSE-NEXT: movaps %xmm1, %xmm0
341 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
344 ; AVX-LABEL: shuffle_v2i64_02_copy:
346 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
348 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
349 ret <2 x i64> %shuffle
351 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
352 ; SSE2-LABEL: shuffle_v2i64_03:
354 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
355 ; SSE2-NEXT: movapd %xmm1, %xmm0
358 ; SSE3-LABEL: shuffle_v2i64_03:
360 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
361 ; SSE3-NEXT: movapd %xmm1, %xmm0
364 ; SSSE3-LABEL: shuffle_v2i64_03:
366 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
367 ; SSSE3-NEXT: movapd %xmm1, %xmm0
370 ; SSE41-LABEL: shuffle_v2i64_03:
372 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
375 ; AVX-LABEL: shuffle_v2i64_03:
377 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
379 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
380 ret <2 x i64> %shuffle
382 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
383 ; SSE2-LABEL: shuffle_v2i64_03_copy:
385 ; SSE2-NEXT: movapd %xmm2, %xmm0
386 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
389 ; SSE3-LABEL: shuffle_v2i64_03_copy:
391 ; SSE3-NEXT: movapd %xmm2, %xmm0
392 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
395 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
397 ; SSSE3-NEXT: movapd %xmm2, %xmm0
398 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
401 ; SSE41-LABEL: shuffle_v2i64_03_copy:
403 ; SSE41-NEXT: movaps %xmm1, %xmm0
404 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
407 ; AVX-LABEL: shuffle_v2i64_03_copy:
409 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
411 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
412 ret <2 x i64> %shuffle
414 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
415 ; SSE2-LABEL: shuffle_v2i64_12:
417 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
420 ; SSE3-LABEL: shuffle_v2i64_12:
422 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
425 ; SSSE3-LABEL: shuffle_v2i64_12:
427 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
428 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
431 ; SSE41-LABEL: shuffle_v2i64_12:
433 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
434 ; SSE41-NEXT: movdqa %xmm1, %xmm0
437 ; AVX-LABEL: shuffle_v2i64_12:
439 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
441 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
442 ret <2 x i64> %shuffle
444 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
445 ; SSE2-LABEL: shuffle_v2i64_12_copy:
447 ; SSE2-NEXT: movapd %xmm1, %xmm0
448 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0]
451 ; SSE3-LABEL: shuffle_v2i64_12_copy:
453 ; SSE3-NEXT: movapd %xmm1, %xmm0
454 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0]
457 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
459 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
460 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
463 ; SSE41-LABEL: shuffle_v2i64_12_copy:
465 ; SSE41-NEXT: movdqa %xmm2, %xmm0
466 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
469 ; AVX-LABEL: shuffle_v2i64_12_copy:
471 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
473 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
474 ret <2 x i64> %shuffle
476 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
477 ; SSE-LABEL: shuffle_v2i64_13:
479 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
482 ; AVX-LABEL: shuffle_v2i64_13:
484 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
486 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
487 ret <2 x i64> %shuffle
489 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
490 ; SSE-LABEL: shuffle_v2i64_13_copy:
492 ; SSE-NEXT: movaps %xmm1, %xmm0
493 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
496 ; AVX-LABEL: shuffle_v2i64_13_copy:
498 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
500 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
501 ret <2 x i64> %shuffle
503 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
504 ; SSE-LABEL: shuffle_v2i64_20:
506 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
507 ; SSE-NEXT: movaps %xmm1, %xmm0
510 ; AVX-LABEL: shuffle_v2i64_20:
512 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
514 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
515 ret <2 x i64> %shuffle
517 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
518 ; SSE-LABEL: shuffle_v2i64_20_copy:
520 ; SSE-NEXT: movaps %xmm2, %xmm0
521 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
524 ; AVX-LABEL: shuffle_v2i64_20_copy:
526 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
528 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
529 ret <2 x i64> %shuffle
531 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
532 ; SSE2-LABEL: shuffle_v2i64_21:
534 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
537 ; SSE3-LABEL: shuffle_v2i64_21:
539 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
542 ; SSSE3-LABEL: shuffle_v2i64_21:
544 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
547 ; SSE41-LABEL: shuffle_v2i64_21:
549 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
552 ; AVX-LABEL: shuffle_v2i64_21:
554 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
556 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
557 ret <2 x i64> %shuffle
559 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
560 ; SSE2-LABEL: shuffle_v2i64_21_copy:
562 ; SSE2-NEXT: movapd %xmm1, %xmm0
563 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
566 ; SSE3-LABEL: shuffle_v2i64_21_copy:
568 ; SSE3-NEXT: movapd %xmm1, %xmm0
569 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
572 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
574 ; SSSE3-NEXT: movapd %xmm1, %xmm0
575 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
578 ; SSE41-LABEL: shuffle_v2i64_21_copy:
580 ; SSE41-NEXT: movaps %xmm1, %xmm0
581 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
584 ; AVX-LABEL: shuffle_v2i64_21_copy:
586 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
588 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
589 ret <2 x i64> %shuffle
591 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
592 ; SSE2-LABEL: shuffle_v2i64_30:
594 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
595 ; SSE2-NEXT: movapd %xmm1, %xmm0
598 ; SSE3-LABEL: shuffle_v2i64_30:
600 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
601 ; SSE3-NEXT: movapd %xmm1, %xmm0
604 ; SSSE3-LABEL: shuffle_v2i64_30:
606 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
609 ; SSE41-LABEL: shuffle_v2i64_30:
611 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
614 ; AVX-LABEL: shuffle_v2i64_30:
616 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
618 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
619 ret <2 x i64> %shuffle
621 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
622 ; SSE2-LABEL: shuffle_v2i64_30_copy:
624 ; SSE2-NEXT: movapd %xmm2, %xmm0
625 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
628 ; SSE3-LABEL: shuffle_v2i64_30_copy:
630 ; SSE3-NEXT: movapd %xmm2, %xmm0
631 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
634 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
636 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
637 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
640 ; SSE41-LABEL: shuffle_v2i64_30_copy:
642 ; SSE41-NEXT: movdqa %xmm1, %xmm0
643 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
646 ; AVX-LABEL: shuffle_v2i64_30_copy:
648 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
650 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
651 ret <2 x i64> %shuffle
653 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
654 ; SSE-LABEL: shuffle_v2i64_31:
656 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
657 ; SSE-NEXT: movaps %xmm1, %xmm0
660 ; AVX-LABEL: shuffle_v2i64_31:
662 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
664 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
665 ret <2 x i64> %shuffle
667 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
668 ; SSE-LABEL: shuffle_v2i64_31_copy:
670 ; SSE-NEXT: movaps %xmm2, %xmm0
671 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
674 ; AVX-LABEL: shuffle_v2i64_31_copy:
676 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
678 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
679 ret <2 x i64> %shuffle
682 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
683 ; SSE-LABEL: shuffle_v2i64_0z:
685 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
688 ; AVX-LABEL: shuffle_v2i64_0z:
690 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
692 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
693 ret <2 x i64> %shuffle
696 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
697 ; SSE-LABEL: shuffle_v2i64_1z:
699 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
702 ; AVX-LABEL: shuffle_v2i64_1z:
704 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
706 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
707 ret <2 x i64> %shuffle
710 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
711 ; SSE-LABEL: shuffle_v2i64_z0:
713 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
716 ; AVX-LABEL: shuffle_v2i64_z0:
718 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
720 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
721 ret <2 x i64> %shuffle
724 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
725 ; SSE2-LABEL: shuffle_v2i64_z1:
727 ; SSE2-NEXT: xorpd %xmm1, %xmm1
728 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
731 ; SSE3-LABEL: shuffle_v2i64_z1:
733 ; SSE3-NEXT: xorpd %xmm1, %xmm1
734 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
737 ; SSSE3-LABEL: shuffle_v2i64_z1:
739 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
740 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
743 ; SSE41-LABEL: shuffle_v2i64_z1:
745 ; SSE41-NEXT: xorps %xmm1, %xmm1
746 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
749 ; AVX-LABEL: shuffle_v2i64_z1:
751 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
752 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
754 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
755 ret <2 x i64> %shuffle
758 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
759 ; SSE-LABEL: shuffle_v2f64_0z:
761 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
764 ; AVX-LABEL: shuffle_v2f64_0z:
766 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
768 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
769 ret <2 x double> %shuffle
772 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
773 ; SSE-LABEL: shuffle_v2f64_1z:
775 ; SSE-NEXT: xorps %xmm1, %xmm1
776 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
779 ; AVX-LABEL: shuffle_v2f64_1z:
781 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
782 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
784 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
785 ret <2 x double> %shuffle
788 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
789 ; SSE-LABEL: shuffle_v2f64_z0:
791 ; SSE-NEXT: xorps %xmm1, %xmm1
792 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
793 ; SSE-NEXT: movaps %xmm1, %xmm0
796 ; AVX-LABEL: shuffle_v2f64_z0:
798 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
799 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
801 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
802 ret <2 x double> %shuffle
805 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
806 ; SSE2-LABEL: shuffle_v2f64_z1:
808 ; SSE2-NEXT: xorpd %xmm1, %xmm1
809 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
812 ; SSE3-LABEL: shuffle_v2f64_z1:
814 ; SSE3-NEXT: xorpd %xmm1, %xmm1
815 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
818 ; SSSE3-LABEL: shuffle_v2f64_z1:
820 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
821 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
824 ; SSE41-LABEL: shuffle_v2f64_z1:
826 ; SSE41-NEXT: xorps %xmm1, %xmm1
827 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
830 ; AVX-LABEL: shuffle_v2f64_z1:
832 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
833 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
835 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
836 ret <2 x double> %shuffle
839 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
840 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
842 ; SSE-NEXT: xorps %xmm1, %xmm1
843 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
846 ; AVX-LABEL: shuffle_v2f64_bitcast_1z:
848 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
849 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
851 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
852 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
853 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
854 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
855 ret <2 x double> %bitcast64
858 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
859 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
861 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
864 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
866 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
869 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
871 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
874 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
876 ; SSE41-NEXT: xorps %xmm1, %xmm1
877 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
880 ; AVX-LABEL: shuffle_v2i64_bitcast_z123:
882 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
883 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
885 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
886 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
887 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
888 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
892 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
893 ; SSE-LABEL: insert_reg_and_zero_v2i64:
895 ; SSE-NEXT: movq %rdi, %xmm0
898 ; AVX-LABEL: insert_reg_and_zero_v2i64:
900 ; AVX-NEXT: vmovq %rdi, %xmm0
902 %v = insertelement <2 x i64> undef, i64 %a, i32 0
903 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
904 ret <2 x i64> %shuffle
907 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
908 ; SSE-LABEL: insert_mem_and_zero_v2i64:
910 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
913 ; AVX-LABEL: insert_mem_and_zero_v2i64:
915 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
917 %a = load i64, i64* %ptr
918 %v = insertelement <2 x i64> undef, i64 %a, i32 0
919 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
920 ret <2 x i64> %shuffle
923 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
924 ; SSE-LABEL: insert_reg_and_zero_v2f64:
926 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
929 ; AVX-LABEL: insert_reg_and_zero_v2f64:
931 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
933 %v = insertelement <2 x double> undef, double %a, i32 0
934 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
935 ret <2 x double> %shuffle
938 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
939 ; SSE-LABEL: insert_mem_and_zero_v2f64:
941 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
944 ; AVX-LABEL: insert_mem_and_zero_v2f64:
946 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
948 %a = load double, double* %ptr
949 %v = insertelement <2 x double> undef, double %a, i32 0
950 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
951 ret <2 x double> %shuffle
954 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
955 ; SSE2-LABEL: insert_reg_lo_v2i64:
957 ; SSE2-NEXT: movq %rdi, %xmm1
958 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
961 ; SSE3-LABEL: insert_reg_lo_v2i64:
963 ; SSE3-NEXT: movq %rdi, %xmm1
964 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
967 ; SSSE3-LABEL: insert_reg_lo_v2i64:
969 ; SSSE3-NEXT: movq %rdi, %xmm1
970 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
973 ; SSE41-LABEL: insert_reg_lo_v2i64:
975 ; SSE41-NEXT: pinsrq $0, %rdi, %xmm0
978 ; AVX-LABEL: insert_reg_lo_v2i64:
980 ; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
982 %v = insertelement <2 x i64> undef, i64 %a, i32 0
983 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
984 ret <2 x i64> %shuffle
987 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
988 ; SSE2-LABEL: insert_mem_lo_v2i64:
990 ; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
993 ; SSE3-LABEL: insert_mem_lo_v2i64:
995 ; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
998 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1000 ; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1003 ; SSE41-LABEL: insert_mem_lo_v2i64:
1005 ; SSE41-NEXT: pinsrq $0, (%rdi), %xmm0
1008 ; AVX-LABEL: insert_mem_lo_v2i64:
1010 ; AVX-NEXT: vpinsrq $0, (%rdi), %xmm0, %xmm0
1012 %a = load i64, i64* %ptr
1013 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1014 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1015 ret <2 x i64> %shuffle
1018 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1019 ; SSE2-LABEL: insert_reg_hi_v2i64:
1021 ; SSE2-NEXT: movq %rdi, %xmm1
1022 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1025 ; SSE3-LABEL: insert_reg_hi_v2i64:
1027 ; SSE3-NEXT: movq %rdi, %xmm1
1028 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1031 ; SSSE3-LABEL: insert_reg_hi_v2i64:
1033 ; SSSE3-NEXT: movq %rdi, %xmm1
1034 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1037 ; SSE41-LABEL: insert_reg_hi_v2i64:
1039 ; SSE41-NEXT: pinsrq $1, %rdi, %xmm0
1042 ; AVX-LABEL: insert_reg_hi_v2i64:
1044 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
1046 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1047 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1048 ret <2 x i64> %shuffle
1051 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1052 ; SSE2-LABEL: insert_mem_hi_v2i64:
1054 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1055 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1058 ; SSE3-LABEL: insert_mem_hi_v2i64:
1060 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1061 ; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1064 ; SSSE3-LABEL: insert_mem_hi_v2i64:
1066 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1067 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1070 ; SSE41-LABEL: insert_mem_hi_v2i64:
1072 ; SSE41-NEXT: pinsrq $1, (%rdi), %xmm0
1075 ; AVX-LABEL: insert_mem_hi_v2i64:
1077 ; AVX-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0
1079 %a = load i64, i64* %ptr
1080 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1081 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1082 ret <2 x i64> %shuffle
1085 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1086 ; SSE2-LABEL: insert_reg_lo_v2f64:
1088 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1089 ; SSE2-NEXT: movapd %xmm1, %xmm0
1092 ; SSE3-LABEL: insert_reg_lo_v2f64:
1094 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1095 ; SSE3-NEXT: movapd %xmm1, %xmm0
1098 ; SSSE3-LABEL: insert_reg_lo_v2f64:
1100 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1101 ; SSSE3-NEXT: movapd %xmm1, %xmm0
1104 ; SSE41-LABEL: insert_reg_lo_v2f64:
1106 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1109 ; AVX-LABEL: insert_reg_lo_v2f64:
1111 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1113 %v = insertelement <2 x double> undef, double %a, i32 0
1114 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1115 ret <2 x double> %shuffle
1118 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1119 ; SSE-LABEL: insert_mem_lo_v2f64:
1121 ; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1124 ; AVX-LABEL: insert_mem_lo_v2f64:
1126 ; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1128 %a = load double, double* %ptr
1129 %v = insertelement <2 x double> undef, double %a, i32 0
1130 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1131 ret <2 x double> %shuffle
1134 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1135 ; SSE-LABEL: insert_reg_hi_v2f64:
1137 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1138 ; SSE-NEXT: movaps %xmm1, %xmm0
1141 ; AVX-LABEL: insert_reg_hi_v2f64:
1143 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1145 %v = insertelement <2 x double> undef, double %a, i32 0
1146 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1147 ret <2 x double> %shuffle
1150 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1151 ; SSE-LABEL: insert_mem_hi_v2f64:
1153 ; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1156 ; AVX-LABEL: insert_mem_hi_v2f64:
1158 ; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1160 %a = load double, double* %ptr
1161 %v = insertelement <2 x double> undef, double %a, i32 0
1162 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1163 ret <2 x double> %shuffle
1166 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1167 ; SSE2-LABEL: insert_dup_reg_v2f64:
1169 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1172 ; SSE3-LABEL: insert_dup_reg_v2f64:
1174 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1177 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1179 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1182 ; SSE41-LABEL: insert_dup_reg_v2f64:
1184 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1187 ; AVX-LABEL: insert_dup_reg_v2f64:
1189 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1191 %v = insertelement <2 x double> undef, double %a, i32 0
1192 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1193 ret <2 x double> %shuffle
1196 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1197 ; SSE2-LABEL: insert_dup_mem_v2f64:
1199 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1200 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1203 ; SSE3-LABEL: insert_dup_mem_v2f64:
1205 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1208 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1210 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1213 ; SSE41-LABEL: insert_dup_mem_v2f64:
1215 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1218 ; AVX-LABEL: insert_dup_mem_v2f64:
1220 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1222 %a = load double, double* %ptr
1223 %v = insertelement <2 x double> undef, double %a, i32 0
1224 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1225 ret <2 x double> %shuffle
1228 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1229 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1231 ; SSE2-NEXT: movaps (%rdi), %xmm0
1232 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1235 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1237 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1240 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1242 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1245 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1247 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1250 ; AVX-LABEL: insert_dup_mem128_v2f64:
1252 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1254 %v = load <2 x double>, <2 x double>* %ptr
1255 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1256 ret <2 x double> %shuffle
1260 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1261 ; SSE-LABEL: insert_dup_mem_v2i64:
1263 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1264 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1267 ; AVX-LABEL: insert_dup_mem_v2i64:
1269 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1271 %tmp = load i64, i64* %ptr, align 1
1272 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1273 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1277 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1278 ; SSE-LABEL: shuffle_mem_v2f64_10:
1280 ; SSE-NEXT: movapd (%rdi), %xmm0
1281 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1284 ; AVX-LABEL: shuffle_mem_v2f64_10:
1286 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1289 %a = load <2 x double>, <2 x double>* %ptr
1290 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1291 ret <2 x double> %shuffle
1294 define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) {
1295 ; SSE-LABEL: shuffle_mem_v2f64_31:
1297 ; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1300 ; AVX-LABEL: shuffle_mem_v2f64_31:
1302 ; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1304 %c = load <2 x double>, <2 x double>* %b
1305 %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1>