1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
10 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
11 ; SSE-LABEL: shuffle_v2i64_00:
13 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
16 ; AVX1-LABEL: shuffle_v2i64_00:
18 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
21 ; AVX2-LABEL: shuffle_v2i64_00:
23 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
26 ; AVX512VL-LABEL: shuffle_v2i64_00:
28 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
30 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
31 ret <2 x i64> %shuffle
33 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
34 ; SSE-LABEL: shuffle_v2i64_10:
36 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 ; AVX-LABEL: shuffle_v2i64_10:
41 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
43 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
44 ret <2 x i64> %shuffle
46 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
47 ; SSE-LABEL: shuffle_v2i64_11:
49 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 ; AVX-LABEL: shuffle_v2i64_11:
54 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
56 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
57 ret <2 x i64> %shuffle
59 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
60 ; SSE-LABEL: shuffle_v2i64_22:
62 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
65 ; AVX1-LABEL: shuffle_v2i64_22:
67 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1]
70 ; AVX2-LABEL: shuffle_v2i64_22:
72 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
75 ; AVX512VL-LABEL: shuffle_v2i64_22:
77 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
79 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
80 ret <2 x i64> %shuffle
82 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
83 ; SSE-LABEL: shuffle_v2i64_32:
85 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
88 ; AVX-LABEL: shuffle_v2i64_32:
90 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1]
92 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
93 ret <2 x i64> %shuffle
95 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
96 ; SSE-LABEL: shuffle_v2i64_33:
98 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
101 ; AVX-LABEL: shuffle_v2i64_33:
103 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3]
105 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
106 ret <2 x i64> %shuffle
109 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
110 ; SSE2-LABEL: shuffle_v2f64_00:
112 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
115 ; SSE3-LABEL: shuffle_v2f64_00:
117 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
120 ; SSSE3-LABEL: shuffle_v2f64_00:
122 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
125 ; SSE41-LABEL: shuffle_v2f64_00:
127 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
130 ; AVX-LABEL: shuffle_v2f64_00:
132 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
134 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
135 ret <2 x double> %shuffle
137 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
138 ; SSE-LABEL: shuffle_v2f64_10:
140 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
143 ; AVX-LABEL: shuffle_v2f64_10:
145 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
148 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
149 ret <2 x double> %shuffle
151 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
152 ; SSE-LABEL: shuffle_v2f64_11:
154 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
157 ; AVX-LABEL: shuffle_v2f64_11:
159 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
161 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
162 ret <2 x double> %shuffle
164 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
165 ; SSE2-LABEL: shuffle_v2f64_22:
167 ; SSE2-NEXT: movaps %xmm1, %xmm0
168 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171 ; SSE3-LABEL: shuffle_v2f64_22:
173 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
176 ; SSSE3-LABEL: shuffle_v2f64_22:
178 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
181 ; SSE41-LABEL: shuffle_v2f64_22:
183 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
186 ; AVX-LABEL: shuffle_v2f64_22:
188 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
190 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
191 ret <2 x double> %shuffle
193 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
194 ; SSE-LABEL: shuffle_v2f64_32:
196 ; SSE-NEXT: movaps %xmm1, %xmm0
197 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
200 ; AVX-LABEL: shuffle_v2f64_32:
202 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
205 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
206 ret <2 x double> %shuffle
208 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
209 ; SSE-LABEL: shuffle_v2f64_33:
211 ; SSE-NEXT: movaps %xmm1, %xmm0
212 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
215 ; AVX-LABEL: shuffle_v2f64_33:
217 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
219 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
220 ret <2 x double> %shuffle
222 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
223 ; SSE2-LABEL: shuffle_v2f64_03:
225 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
228 ; SSE3-LABEL: shuffle_v2f64_03:
230 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
233 ; SSSE3-LABEL: shuffle_v2f64_03:
235 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
238 ; SSE41-LABEL: shuffle_v2f64_03:
240 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
243 ; AVX-LABEL: shuffle_v2f64_03:
245 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
247 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
248 ret <2 x double> %shuffle
250 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
251 ; SSE2-LABEL: shuffle_v2f64_21:
253 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
256 ; SSE3-LABEL: shuffle_v2f64_21:
258 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
261 ; SSSE3-LABEL: shuffle_v2f64_21:
263 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
266 ; SSE41-LABEL: shuffle_v2f64_21:
268 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
271 ; AVX-LABEL: shuffle_v2f64_21:
273 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
275 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
276 ret <2 x double> %shuffle
278 define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) {
279 ; SSE2-LABEL: shuffle_v2f64_u2:
281 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
284 ; SSE3-LABEL: shuffle_v2f64_u2:
286 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
289 ; SSSE3-LABEL: shuffle_v2f64_u2:
291 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
294 ; SSE41-LABEL: shuffle_v2f64_u2:
296 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
299 ; AVX-LABEL: shuffle_v2f64_u2:
301 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
303 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2>
304 ret <2 x double> %shuffle
306 define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) {
307 ; SSE-LABEL: shuffle_v2f64_3u:
309 ; SSE-NEXT: movaps %xmm1, %xmm0
310 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
313 ; AVX-LABEL: shuffle_v2f64_3u:
315 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
317 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef>
318 ret <2 x double> %shuffle
321 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
322 ; SSE-LABEL: shuffle_v2i64_02:
324 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
327 ; AVX-LABEL: shuffle_v2i64_02:
329 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
331 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
332 ret <2 x i64> %shuffle
334 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
335 ; SSE-LABEL: shuffle_v2i64_02_copy:
337 ; SSE-NEXT: movaps %xmm1, %xmm0
338 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
341 ; AVX-LABEL: shuffle_v2i64_02_copy:
343 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
345 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
346 ret <2 x i64> %shuffle
348 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
349 ; SSE2-LABEL: shuffle_v2i64_03:
351 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
354 ; SSE3-LABEL: shuffle_v2i64_03:
356 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
359 ; SSSE3-LABEL: shuffle_v2i64_03:
361 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
364 ; SSE41-LABEL: shuffle_v2i64_03:
366 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
369 ; AVX-LABEL: shuffle_v2i64_03:
371 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
373 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
374 ret <2 x i64> %shuffle
376 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
377 ; SSE2-LABEL: shuffle_v2i64_03_copy:
379 ; SSE2-NEXT: movaps %xmm1, %xmm0
380 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
383 ; SSE3-LABEL: shuffle_v2i64_03_copy:
385 ; SSE3-NEXT: movaps %xmm1, %xmm0
386 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
389 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
391 ; SSSE3-NEXT: movaps %xmm1, %xmm0
392 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
395 ; SSE41-LABEL: shuffle_v2i64_03_copy:
397 ; SSE41-NEXT: movaps %xmm1, %xmm0
398 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
401 ; AVX-LABEL: shuffle_v2i64_03_copy:
403 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
405 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
406 ret <2 x i64> %shuffle
408 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
409 ; SSE2-LABEL: shuffle_v2i64_12:
411 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
414 ; SSE3-LABEL: shuffle_v2i64_12:
416 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
419 ; SSSE3-LABEL: shuffle_v2i64_12:
421 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
422 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
425 ; SSE41-LABEL: shuffle_v2i64_12:
427 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
428 ; SSE41-NEXT: movdqa %xmm1, %xmm0
431 ; AVX-LABEL: shuffle_v2i64_12:
433 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
435 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
436 ret <2 x i64> %shuffle
438 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
439 ; SSE2-LABEL: shuffle_v2i64_12_copy:
441 ; SSE2-NEXT: movaps %xmm1, %xmm0
442 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
445 ; SSE3-LABEL: shuffle_v2i64_12_copy:
447 ; SSE3-NEXT: movaps %xmm1, %xmm0
448 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
451 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
453 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
454 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
457 ; SSE41-LABEL: shuffle_v2i64_12_copy:
459 ; SSE41-NEXT: movdqa %xmm2, %xmm0
460 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
463 ; AVX-LABEL: shuffle_v2i64_12_copy:
465 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
467 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
468 ret <2 x i64> %shuffle
470 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
471 ; SSE-LABEL: shuffle_v2i64_13:
473 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
476 ; AVX-LABEL: shuffle_v2i64_13:
478 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
480 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
481 ret <2 x i64> %shuffle
483 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
484 ; SSE-LABEL: shuffle_v2i64_13_copy:
486 ; SSE-NEXT: movaps %xmm1, %xmm0
487 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
490 ; AVX-LABEL: shuffle_v2i64_13_copy:
492 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
494 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
495 ret <2 x i64> %shuffle
497 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
498 ; SSE-LABEL: shuffle_v2i64_20:
500 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
501 ; SSE-NEXT: movaps %xmm1, %xmm0
504 ; AVX-LABEL: shuffle_v2i64_20:
506 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
508 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
509 ret <2 x i64> %shuffle
511 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
512 ; SSE-LABEL: shuffle_v2i64_20_copy:
514 ; SSE-NEXT: movaps %xmm2, %xmm0
515 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
518 ; AVX-LABEL: shuffle_v2i64_20_copy:
520 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
522 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
523 ret <2 x i64> %shuffle
525 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
526 ; SSE2-LABEL: shuffle_v2i64_21:
528 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
531 ; SSE3-LABEL: shuffle_v2i64_21:
533 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
536 ; SSSE3-LABEL: shuffle_v2i64_21:
538 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
541 ; SSE41-LABEL: shuffle_v2i64_21:
543 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
546 ; AVX-LABEL: shuffle_v2i64_21:
548 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
550 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
551 ret <2 x i64> %shuffle
553 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
554 ; SSE2-LABEL: shuffle_v2i64_21_copy:
556 ; SSE2-NEXT: movapd %xmm1, %xmm0
557 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
560 ; SSE3-LABEL: shuffle_v2i64_21_copy:
562 ; SSE3-NEXT: movapd %xmm1, %xmm0
563 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
566 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
568 ; SSSE3-NEXT: movapd %xmm1, %xmm0
569 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
572 ; SSE41-LABEL: shuffle_v2i64_21_copy:
574 ; SSE41-NEXT: movaps %xmm1, %xmm0
575 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
578 ; AVX-LABEL: shuffle_v2i64_21_copy:
580 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
582 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
583 ret <2 x i64> %shuffle
585 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
586 ; SSE2-LABEL: shuffle_v2i64_30:
588 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
589 ; SSE2-NEXT: movaps %xmm1, %xmm0
592 ; SSE3-LABEL: shuffle_v2i64_30:
594 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
595 ; SSE3-NEXT: movaps %xmm1, %xmm0
598 ; SSSE3-LABEL: shuffle_v2i64_30:
600 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
603 ; SSE41-LABEL: shuffle_v2i64_30:
605 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
608 ; AVX-LABEL: shuffle_v2i64_30:
610 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
612 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
613 ret <2 x i64> %shuffle
615 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
616 ; SSE2-LABEL: shuffle_v2i64_30_copy:
618 ; SSE2-NEXT: movaps %xmm2, %xmm0
619 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
622 ; SSE3-LABEL: shuffle_v2i64_30_copy:
624 ; SSE3-NEXT: movaps %xmm2, %xmm0
625 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
628 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
630 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
631 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
634 ; SSE41-LABEL: shuffle_v2i64_30_copy:
636 ; SSE41-NEXT: movdqa %xmm1, %xmm0
637 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
640 ; AVX-LABEL: shuffle_v2i64_30_copy:
642 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
644 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
645 ret <2 x i64> %shuffle
647 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
648 ; SSE-LABEL: shuffle_v2i64_31:
650 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
651 ; SSE-NEXT: movaps %xmm1, %xmm0
654 ; AVX-LABEL: shuffle_v2i64_31:
656 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
658 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
659 ret <2 x i64> %shuffle
661 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
662 ; SSE-LABEL: shuffle_v2i64_31_copy:
664 ; SSE-NEXT: movaps %xmm2, %xmm0
665 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
668 ; AVX-LABEL: shuffle_v2i64_31_copy:
670 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
672 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
673 ret <2 x i64> %shuffle
676 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
677 ; SSE-LABEL: shuffle_v2i64_0z:
679 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
682 ; AVX-LABEL: shuffle_v2i64_0z:
684 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
686 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
687 ret <2 x i64> %shuffle
690 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
691 ; SSE-LABEL: shuffle_v2i64_1z:
693 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
696 ; AVX-LABEL: shuffle_v2i64_1z:
698 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
700 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
701 ret <2 x i64> %shuffle
704 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
705 ; SSE-LABEL: shuffle_v2i64_z0:
707 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
710 ; AVX-LABEL: shuffle_v2i64_z0:
712 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
714 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
715 ret <2 x i64> %shuffle
718 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
719 ; SSE2-LABEL: shuffle_v2i64_z1:
721 ; SSE2-NEXT: xorpd %xmm1, %xmm1
722 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
725 ; SSE3-LABEL: shuffle_v2i64_z1:
727 ; SSE3-NEXT: xorpd %xmm1, %xmm1
728 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
731 ; SSSE3-LABEL: shuffle_v2i64_z1:
733 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
734 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
737 ; SSE41-LABEL: shuffle_v2i64_z1:
739 ; SSE41-NEXT: xorps %xmm1, %xmm1
740 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
743 ; AVX-LABEL: shuffle_v2i64_z1:
745 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
746 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
748 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
749 ret <2 x i64> %shuffle
752 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
753 ; SSE-LABEL: shuffle_v2f64_0z:
755 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
758 ; AVX-LABEL: shuffle_v2f64_0z:
760 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
762 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
763 ret <2 x double> %shuffle
766 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
767 ; SSE-LABEL: shuffle_v2f64_1z:
769 ; SSE-NEXT: xorps %xmm1, %xmm1
770 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
773 ; AVX-LABEL: shuffle_v2f64_1z:
775 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
776 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
778 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
779 ret <2 x double> %shuffle
782 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
783 ; SSE-LABEL: shuffle_v2f64_z0:
785 ; SSE-NEXT: xorps %xmm1, %xmm1
786 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
787 ; SSE-NEXT: movaps %xmm1, %xmm0
790 ; AVX-LABEL: shuffle_v2f64_z0:
792 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
793 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
795 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
796 ret <2 x double> %shuffle
799 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
800 ; SSE2-LABEL: shuffle_v2f64_z1:
802 ; SSE2-NEXT: xorpd %xmm1, %xmm1
803 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
806 ; SSE3-LABEL: shuffle_v2f64_z1:
808 ; SSE3-NEXT: xorpd %xmm1, %xmm1
809 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
812 ; SSSE3-LABEL: shuffle_v2f64_z1:
814 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
815 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
818 ; SSE41-LABEL: shuffle_v2f64_z1:
820 ; SSE41-NEXT: xorps %xmm1, %xmm1
821 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
824 ; AVX-LABEL: shuffle_v2f64_z1:
826 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
827 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
829 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
830 ret <2 x double> %shuffle
833 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
834 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
836 ; SSE-NEXT: xorps %xmm1, %xmm1
837 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
840 ; AVX-LABEL: shuffle_v2f64_bitcast_1z:
842 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
843 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
845 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
846 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
847 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
848 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
849 ret <2 x double> %bitcast64
852 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
853 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
855 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
858 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
860 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
863 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
865 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
868 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
870 ; SSE41-NEXT: xorps %xmm1, %xmm1
871 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
874 ; AVX-LABEL: shuffle_v2i64_bitcast_z123:
876 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
877 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
879 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
880 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
881 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
882 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
886 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
887 ; SSE-LABEL: insert_reg_and_zero_v2i64:
889 ; SSE-NEXT: movq %rdi, %xmm0
892 ; AVX-LABEL: insert_reg_and_zero_v2i64:
894 ; AVX-NEXT: vmovq %rdi, %xmm0
896 %v = insertelement <2 x i64> undef, i64 %a, i32 0
897 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
898 ret <2 x i64> %shuffle
901 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
902 ; SSE-LABEL: insert_mem_and_zero_v2i64:
904 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
907 ; AVX-LABEL: insert_mem_and_zero_v2i64:
909 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
911 %a = load i64, i64* %ptr
912 %v = insertelement <2 x i64> undef, i64 %a, i32 0
913 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
914 ret <2 x i64> %shuffle
917 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
918 ; SSE-LABEL: insert_reg_and_zero_v2f64:
920 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
923 ; AVX-LABEL: insert_reg_and_zero_v2f64:
925 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
927 %v = insertelement <2 x double> undef, double %a, i32 0
928 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
929 ret <2 x double> %shuffle
932 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
933 ; SSE-LABEL: insert_mem_and_zero_v2f64:
935 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
938 ; AVX-LABEL: insert_mem_and_zero_v2f64:
940 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
942 %a = load double, double* %ptr
943 %v = insertelement <2 x double> undef, double %a, i32 0
944 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
945 ret <2 x double> %shuffle
948 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
949 ; SSE2-LABEL: insert_reg_lo_v2i64:
951 ; SSE2-NEXT: movq %rdi, %xmm1
952 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
955 ; SSE3-LABEL: insert_reg_lo_v2i64:
957 ; SSE3-NEXT: movq %rdi, %xmm1
958 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
961 ; SSSE3-LABEL: insert_reg_lo_v2i64:
963 ; SSSE3-NEXT: movq %rdi, %xmm1
964 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
967 ; SSE41-LABEL: insert_reg_lo_v2i64:
969 ; SSE41-NEXT: pinsrq $0, %rdi, %xmm0
972 ; AVX-LABEL: insert_reg_lo_v2i64:
974 ; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
976 %v = insertelement <2 x i64> undef, i64 %a, i32 0
977 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
978 ret <2 x i64> %shuffle
981 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
982 ; SSE2-LABEL: insert_mem_lo_v2i64:
984 ; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
987 ; SSE3-LABEL: insert_mem_lo_v2i64:
989 ; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
992 ; SSSE3-LABEL: insert_mem_lo_v2i64:
994 ; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
997 ; SSE41-LABEL: insert_mem_lo_v2i64:
999 ; SSE41-NEXT: pinsrq $0, (%rdi), %xmm0
1002 ; AVX-LABEL: insert_mem_lo_v2i64:
1004 ; AVX-NEXT: vpinsrq $0, (%rdi), %xmm0, %xmm0
1006 %a = load i64, i64* %ptr
1007 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1008 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1009 ret <2 x i64> %shuffle
1012 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1013 ; SSE2-LABEL: insert_reg_hi_v2i64:
1015 ; SSE2-NEXT: movq %rdi, %xmm1
1016 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1019 ; SSE3-LABEL: insert_reg_hi_v2i64:
1021 ; SSE3-NEXT: movq %rdi, %xmm1
1022 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1025 ; SSSE3-LABEL: insert_reg_hi_v2i64:
1027 ; SSSE3-NEXT: movq %rdi, %xmm1
1028 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1031 ; SSE41-LABEL: insert_reg_hi_v2i64:
1033 ; SSE41-NEXT: pinsrq $1, %rdi, %xmm0
1036 ; AVX-LABEL: insert_reg_hi_v2i64:
1038 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
1040 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1041 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1042 ret <2 x i64> %shuffle
1045 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1046 ; SSE2-LABEL: insert_mem_hi_v2i64:
1048 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1049 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1052 ; SSE3-LABEL: insert_mem_hi_v2i64:
1054 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1055 ; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1058 ; SSSE3-LABEL: insert_mem_hi_v2i64:
1060 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1061 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1064 ; SSE41-LABEL: insert_mem_hi_v2i64:
1066 ; SSE41-NEXT: pinsrq $1, (%rdi), %xmm0
1069 ; AVX-LABEL: insert_mem_hi_v2i64:
1071 ; AVX-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0
1073 %a = load i64, i64* %ptr
1074 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1075 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1076 ret <2 x i64> %shuffle
1079 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1080 ; SSE2-LABEL: insert_reg_lo_v2f64:
1082 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1085 ; SSE3-LABEL: insert_reg_lo_v2f64:
1087 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1090 ; SSSE3-LABEL: insert_reg_lo_v2f64:
1092 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1095 ; SSE41-LABEL: insert_reg_lo_v2f64:
1097 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1100 ; AVX-LABEL: insert_reg_lo_v2f64:
1102 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1104 %v = insertelement <2 x double> undef, double %a, i32 0
1105 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1106 ret <2 x double> %shuffle
1109 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1110 ; SSE-LABEL: insert_mem_lo_v2f64:
1112 ; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1115 ; AVX-LABEL: insert_mem_lo_v2f64:
1117 ; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1119 %a = load double, double* %ptr
1120 %v = insertelement <2 x double> undef, double %a, i32 0
1121 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1122 ret <2 x double> %shuffle
1125 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1126 ; SSE-LABEL: insert_reg_hi_v2f64:
1128 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1129 ; SSE-NEXT: movaps %xmm1, %xmm0
1132 ; AVX-LABEL: insert_reg_hi_v2f64:
1134 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1136 %v = insertelement <2 x double> undef, double %a, i32 0
1137 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1138 ret <2 x double> %shuffle
1141 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1142 ; SSE-LABEL: insert_mem_hi_v2f64:
1144 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1147 ; AVX-LABEL: insert_mem_hi_v2f64:
1149 ; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1151 %a = load double, double* %ptr
1152 %v = insertelement <2 x double> undef, double %a, i32 0
1153 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1154 ret <2 x double> %shuffle
1157 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1158 ; SSE2-LABEL: insert_dup_reg_v2f64:
1160 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1163 ; SSE3-LABEL: insert_dup_reg_v2f64:
1165 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1168 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1170 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1173 ; SSE41-LABEL: insert_dup_reg_v2f64:
1175 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1178 ; AVX-LABEL: insert_dup_reg_v2f64:
1180 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1182 %v = insertelement <2 x double> undef, double %a, i32 0
1183 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1184 ret <2 x double> %shuffle
1187 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1188 ; SSE2-LABEL: insert_dup_mem_v2f64:
1190 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1191 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1194 ; SSE3-LABEL: insert_dup_mem_v2f64:
1196 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1199 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1201 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1204 ; SSE41-LABEL: insert_dup_mem_v2f64:
1206 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1209 ; AVX-LABEL: insert_dup_mem_v2f64:
1211 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1213 %a = load double, double* %ptr
1214 %v = insertelement <2 x double> undef, double %a, i32 0
1215 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1216 ret <2 x double> %shuffle
1219 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1220 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1222 ; SSE2-NEXT: movaps (%rdi), %xmm0
1223 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1226 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1228 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1231 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1233 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1236 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1238 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1241 ; AVX-LABEL: insert_dup_mem128_v2f64:
1243 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1245 %v = load <2 x double>, <2 x double>* %ptr
1246 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1247 ret <2 x double> %shuffle
1251 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1252 ; SSE-LABEL: insert_dup_mem_v2i64:
1254 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1255 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1258 ; AVX-LABEL: insert_dup_mem_v2i64:
1260 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1262 %tmp = load i64, i64* %ptr, align 1
1263 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1264 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1268 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1269 ; SSE-LABEL: shuffle_mem_v2f64_10:
1271 ; SSE-NEXT: movaps (%rdi), %xmm0
1272 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
1275 ; AVX-LABEL: shuffle_mem_v2f64_10:
1277 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1280 %a = load <2 x double>, <2 x double>* %ptr
1281 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1282 ret <2 x double> %shuffle
1285 define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) {
1286 ; SSE-LABEL: shuffle_mem_v2f64_31:
1288 ; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1291 ; AVX-LABEL: shuffle_mem_v2f64_31:
1293 ; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1295 %c = load <2 x double>, <2 x double>* %b
1296 %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1>
1300 define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) {
1301 ; SSE-LABEL: shuffle_mem_v2f64_02:
1303 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1306 ; AVX-LABEL: shuffle_mem_v2f64_02:
1308 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1310 %b = load <2 x double>, <2 x double>* %pb, align 1
1311 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
1312 ret <2 x double> %shuffle
1315 define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) {
1316 ; SSE2-LABEL: shuffle_mem_v2f64_21:
1318 ; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1321 ; SSE3-LABEL: shuffle_mem_v2f64_21:
1323 ; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1326 ; SSSE3-LABEL: shuffle_mem_v2f64_21:
1328 ; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1331 ; SSE41-LABEL: shuffle_mem_v2f64_21:
1333 ; SSE41-NEXT: movups (%rdi), %xmm1
1334 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1337 ; AVX-LABEL: shuffle_mem_v2f64_21:
1339 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1341 %b = load <2 x double>, <2 x double>* %pb, align 1
1342 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
1343 ret <2 x double> %shuffle