1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s
4 define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) {
6 ; CHECK: # %bb.0: # %bb
7 ; CHECK-NEXT: pushq %rbp
8 ; CHECK-NEXT: .cfi_def_cfa_offset 16
9 ; CHECK-NEXT: .cfi_offset %rbp, -16
10 ; CHECK-NEXT: movq %rsp, %rbp
11 ; CHECK-NEXT: .cfi_def_cfa_register %rbp
12 ; CHECK-NEXT: andq $-32, %rsp
13 ; CHECK-NEXT: subq $320, %rsp # imm = 0x140
14 ; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
15 ; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
16 ; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
17 ; CHECK-NEXT: vmovaps 144(%rbp), %ymm11
18 ; CHECK-NEXT: vmovaps 112(%rbp), %ymm12
19 ; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
20 ; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
21 ; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
22 ; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
23 ; CHECK-NEXT: vmovaps %xmm9, %xmm6
24 ; CHECK-NEXT: vmovdqa %xmm6, %xmm9
25 ; CHECK-NEXT: # kill: def $ymm9 killed $xmm9
26 ; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
27 ; CHECK-NEXT: # implicit-def: $ymm0
28 ; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
29 ; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
30 ; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0]
31 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7]
32 ; CHECK-NEXT: vmovaps %xmm2, %xmm6
33 ; CHECK-NEXT: # implicit-def: $ymm2
34 ; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm2, %ymm2
35 ; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm6
36 ; CHECK-NEXT: vmovq {{.*#+}} xmm6 = xmm6[0],zero
37 ; CHECK-NEXT: # implicit-def: $ymm11
38 ; CHECK-NEXT: vmovaps %xmm6, %xmm11
39 ; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm11[0,1,2,3],ymm2[4,5,6,7]
40 ; CHECK-NEXT: vmovaps %xmm7, %xmm6
41 ; CHECK-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2,3,4,5,6,7]
42 ; CHECK-NEXT: # implicit-def: $ymm11
43 ; CHECK-NEXT: vmovaps %xmm6, %xmm11
44 ; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm9[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm9[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
45 ; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3]
46 ; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm11[0,1,2,3],ymm9[4,5,6,7]
47 ; CHECK-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
48 ; CHECK-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3]
49 ; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
50 ; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7]
51 ; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
52 ; CHECK-NEXT: vmovaps %ymm5, %ymm1
53 ; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
54 ; CHECK-NEXT: vmovaps %ymm9, %ymm3
55 ; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
56 ; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
57 ; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
58 ; CHECK-NEXT: vmovaps %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
59 ; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
60 ; CHECK-NEXT: vmovaps %ymm4, (%rsp) # 32-byte Spill
61 ; CHECK-NEXT: movq %rbp, %rsp
62 ; CHECK-NEXT: popq %rbp
63 ; CHECK-NEXT: .cfi_def_cfa %rsp, 8
66 %tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
67 %tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer
68 %tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5
69 %tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6>