1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 --x86-asm-syntax=intel -mtriple=x86_64 -mattr=avx < %s | FileCheck %s
4 define void @extracter0([4 x <4 x i1>] %matrix) {
5 ; CHECK-LABEL: extracter0:
8 ; CHECK-NEXT: .cfi_def_cfa_offset 16
10 ; CHECK-NEXT: .cfi_def_cfa_offset 24
11 ; CHECK-NEXT: push rbx
12 ; CHECK-NEXT: .cfi_def_cfa_offset 32
13 ; CHECK-NEXT: .cfi_offset rbx, -32
14 ; CHECK-NEXT: .cfi_offset r14, -24
15 ; CHECK-NEXT: .cfi_offset rbp, -16
16 ; CHECK-NEXT: vpslld xmm0, xmm0, 31
17 ; CHECK-NEXT: vmovmskps edi, xmm0
18 ; CHECK-NEXT: mov ebx, edi
19 ; CHECK-NEXT: shr bl, 3
20 ; CHECK-NEXT: mov ebp, edi
21 ; CHECK-NEXT: and bpl, 4
22 ; CHECK-NEXT: shr bpl, 2
23 ; CHECK-NEXT: mov r14d, edi
24 ; CHECK-NEXT: and r14b, 2
25 ; CHECK-NEXT: shr r14b
26 ; CHECK-NEXT: call print_i1@PLT
27 ; CHECK-NEXT: movzx edi, r14b
28 ; CHECK-NEXT: call print_i1@PLT
29 ; CHECK-NEXT: movzx edi, bpl
30 ; CHECK-NEXT: call print_i1@PLT
31 ; CHECK-NEXT: movzx edi, bl
32 ; CHECK-NEXT: call print_i1@PLT
34 ; CHECK-NEXT: .cfi_def_cfa_offset 24
36 ; CHECK-NEXT: .cfi_def_cfa_offset 16
38 ; CHECK-NEXT: .cfi_def_cfa_offset 8
40 %1 = extractvalue [4 x <4 x i1>] %matrix, 0
41 %2 = extractelement <4 x i1> %1, i64 0
42 %3 = extractelement <4 x i1> %1, i64 1
43 %4 = extractelement <4 x i1> %1, i64 2
44 %5 = extractelement <4 x i1> %1, i64 3
45 call void @print_i1(i1 %2)
46 call void @print_i1(i1 %3)
47 call void @print_i1(i1 %4)
48 call void @print_i1(i1 %5)
52 define void @extracter1([4 x <4 x i1>] %matrix) {
53 ; CHECK-LABEL: extracter1:
55 ; CHECK-NEXT: push rbp
56 ; CHECK-NEXT: .cfi_def_cfa_offset 16
57 ; CHECK-NEXT: push r15
58 ; CHECK-NEXT: .cfi_def_cfa_offset 24
59 ; CHECK-NEXT: push r14
60 ; CHECK-NEXT: .cfi_def_cfa_offset 32
61 ; CHECK-NEXT: push r13
62 ; CHECK-NEXT: .cfi_def_cfa_offset 40
63 ; CHECK-NEXT: push r12
64 ; CHECK-NEXT: .cfi_def_cfa_offset 48
65 ; CHECK-NEXT: push rbx
66 ; CHECK-NEXT: .cfi_def_cfa_offset 56
67 ; CHECK-NEXT: push rax
68 ; CHECK-NEXT: .cfi_def_cfa_offset 64
69 ; CHECK-NEXT: .cfi_offset rbx, -56
70 ; CHECK-NEXT: .cfi_offset r12, -48
71 ; CHECK-NEXT: .cfi_offset r13, -40
72 ; CHECK-NEXT: .cfi_offset r14, -32
73 ; CHECK-NEXT: .cfi_offset r15, -24
74 ; CHECK-NEXT: .cfi_offset rbp, -16
75 ; CHECK-NEXT: vpslld xmm1, xmm1, 31
76 ; CHECK-NEXT: vmovmskps ebx, xmm1
77 ; CHECK-NEXT: mov eax, ebx
78 ; CHECK-NEXT: shr al, 3
79 ; CHECK-NEXT: mov byte ptr [rsp + 7], al # 1-byte Spill
80 ; CHECK-NEXT: mov r14d, ebx
81 ; CHECK-NEXT: and r14b, 4
82 ; CHECK-NEXT: shr r14b, 2
83 ; CHECK-NEXT: mov r15d, ebx
84 ; CHECK-NEXT: and r15b, 2
85 ; CHECK-NEXT: shr r15b
86 ; CHECK-NEXT: vpslld xmm0, xmm0, 31
87 ; CHECK-NEXT: vmovmskps edi, xmm0
88 ; CHECK-NEXT: mov r12d, edi
89 ; CHECK-NEXT: shr r12b, 3
90 ; CHECK-NEXT: mov r13d, edi
91 ; CHECK-NEXT: and r13b, 4
92 ; CHECK-NEXT: shr r13b, 2
93 ; CHECK-NEXT: mov ebp, edi
94 ; CHECK-NEXT: and bpl, 2
96 ; CHECK-NEXT: call print_i1@PLT
97 ; CHECK-NEXT: movzx edi, bpl
98 ; CHECK-NEXT: call print_i1@PLT
99 ; CHECK-NEXT: movzx edi, r13b
100 ; CHECK-NEXT: call print_i1@PLT
101 ; CHECK-NEXT: movzx edi, r12b
102 ; CHECK-NEXT: call print_i1@PLT
103 ; CHECK-NEXT: mov edi, ebx
104 ; CHECK-NEXT: call print_i1@PLT
105 ; CHECK-NEXT: movzx edi, r15b
106 ; CHECK-NEXT: call print_i1@PLT
107 ; CHECK-NEXT: movzx edi, r14b
108 ; CHECK-NEXT: call print_i1@PLT
109 ; CHECK-NEXT: movzx edi, byte ptr [rsp + 7] # 1-byte Folded Reload
110 ; CHECK-NEXT: call print_i1@PLT
111 ; CHECK-NEXT: add rsp, 8
112 ; CHECK-NEXT: .cfi_def_cfa_offset 56
113 ; CHECK-NEXT: pop rbx
114 ; CHECK-NEXT: .cfi_def_cfa_offset 48
115 ; CHECK-NEXT: pop r12
116 ; CHECK-NEXT: .cfi_def_cfa_offset 40
117 ; CHECK-NEXT: pop r13
118 ; CHECK-NEXT: .cfi_def_cfa_offset 32
119 ; CHECK-NEXT: pop r14
120 ; CHECK-NEXT: .cfi_def_cfa_offset 24
121 ; CHECK-NEXT: pop r15
122 ; CHECK-NEXT: .cfi_def_cfa_offset 16
123 ; CHECK-NEXT: pop rbp
124 ; CHECK-NEXT: .cfi_def_cfa_offset 8
126 %1 = extractvalue [4 x <4 x i1>] %matrix, 0
127 %2 = extractelement <4 x i1> %1, i64 0
128 %3 = extractelement <4 x i1> %1, i64 1
129 %4 = extractelement <4 x i1> %1, i64 2
130 %5 = extractelement <4 x i1> %1, i64 3
131 call void @print_i1(i1 %2)
132 call void @print_i1(i1 %3)
133 call void @print_i1(i1 %4)
134 call void @print_i1(i1 %5)
135 %6 = extractvalue [4 x <4 x i1>] %matrix, 1
136 %7 = extractelement <4 x i1> %6, i64 0
137 %8 = extractelement <4 x i1> %6, i64 1
138 %9 = extractelement <4 x i1> %6, i64 2
139 %10 = extractelement <4 x i1> %6, i64 3
140 call void @print_i1(i1 %7)
141 call void @print_i1(i1 %8)
142 call void @print_i1(i1 %9)
143 call void @print_i1(i1 %10)
147 declare void @print_i1(i1)