1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 ; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
5 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
6 ; CHECK-LABEL: cnt32_advsimd:
8 ; CHECK-NEXT: mov w8, w0
9 ; CHECK-NEXT: fmov d0, x8
10 ; CHECK-NEXT: cnt.8b v0, v0
11 ; CHECK-NEXT: uaddlv.8b h0, v0
12 ; CHECK-NEXT: fmov w0, s0
15 ; CHECK-NONEON-LABEL: cnt32_advsimd:
16 ; CHECK-NONEON: // %bb.0:
17 ; CHECK-NONEON-NEXT: lsr w8, w0, #1
18 ; CHECK-NONEON-NEXT: and w8, w8, #0x55555555
19 ; CHECK-NONEON-NEXT: sub w8, w0, w8
20 ; CHECK-NONEON-NEXT: and w9, w8, #0x33333333
21 ; CHECK-NONEON-NEXT: lsr w8, w8, #2
22 ; CHECK-NONEON-NEXT: and w8, w8, #0x33333333
23 ; CHECK-NONEON-NEXT: add w8, w9, w8
24 ; CHECK-NONEON-NEXT: add w8, w8, w8, lsr #4
25 ; CHECK-NONEON-NEXT: and w8, w8, #0xf0f0f0f
26 ; CHECK-NONEON-NEXT: mov w9, #16843009
27 ; CHECK-NONEON-NEXT: mul w8, w8, w9
28 ; CHECK-NONEON-NEXT: lsr w0, w8, #24
29 ; CHECK-NONEON-NEXT: ret
30 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
34 define i32 @cnt32_advsimd_2(<2 x i32> %x) {
35 ; CHECK-LABEL: cnt32_advsimd_2:
37 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
38 ; CHECK-NEXT: fmov w0, s0
39 ; CHECK-NEXT: fmov d0, x0
40 ; CHECK-NEXT: cnt.8b v0, v0
41 ; CHECK-NEXT: uaddlv.8b h0, v0
42 ; CHECK-NEXT: fmov w0, s0
45 ; CHECK-NONEON-LABEL: cnt32_advsimd_2:
46 ; CHECK-NONEON: // %bb.0:
47 ; CHECK-NONEON-NEXT: lsr w8, w0, #1
48 ; CHECK-NONEON-NEXT: and w8, w8, #0x55555555
49 ; CHECK-NONEON-NEXT: sub w8, w0, w8
50 ; CHECK-NONEON-NEXT: and w9, w8, #0x33333333
51 ; CHECK-NONEON-NEXT: lsr w8, w8, #2
52 ; CHECK-NONEON-NEXT: and w8, w8, #0x33333333
53 ; CHECK-NONEON-NEXT: add w8, w9, w8
54 ; CHECK-NONEON-NEXT: add w8, w8, w8, lsr #4
55 ; CHECK-NONEON-NEXT: and w8, w8, #0xf0f0f0f
56 ; CHECK-NONEON-NEXT: mov w9, #16843009
57 ; CHECK-NONEON-NEXT: mul w8, w8, w9
58 ; CHECK-NONEON-NEXT: lsr w0, w8, #24
59 ; CHECK-NONEON-NEXT: ret
60 %1 = extractelement <2 x i32> %x, i64 0
61 %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
65 define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
66 ; CHECK-LABEL: cnt64_advsimd:
68 ; CHECK-NEXT: fmov d0, x0
69 ; CHECK-NEXT: cnt.8b v0, v0
70 ; CHECK-NEXT: uaddlv.8b h0, v0
71 ; CHECK-NEXT: fmov w0, s0
74 ; CHECK-NONEON-LABEL: cnt64_advsimd:
75 ; CHECK-NONEON: // %bb.0:
76 ; CHECK-NONEON-NEXT: lsr x8, x0, #1
77 ; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
78 ; CHECK-NONEON-NEXT: sub x8, x0, x8
79 ; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
80 ; CHECK-NONEON-NEXT: lsr x8, x8, #2
81 ; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
82 ; CHECK-NONEON-NEXT: add x8, x9, x8
83 ; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
84 ; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
85 ; CHECK-NONEON-NEXT: mov x9, #72340172838076673
86 ; CHECK-NONEON-NEXT: mul x8, x8, x9
87 ; CHECK-NONEON-NEXT: lsr x0, x8, #56
88 ; CHECK-NONEON-NEXT: ret
89 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
93 ; Do not use AdvSIMD when -mno-implicit-float is specified.
96 define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
99 ; CHECK-NEXT: lsr w8, w0, #1
100 ; CHECK-NEXT: and w8, w8, #0x55555555
101 ; CHECK-NEXT: sub w8, w0, w8
102 ; CHECK-NEXT: and w9, w8, #0x33333333
103 ; CHECK-NEXT: lsr w8, w8, #2
104 ; CHECK-NEXT: and w8, w8, #0x33333333
105 ; CHECK-NEXT: add w8, w9, w8
106 ; CHECK-NEXT: add w8, w8, w8, lsr #4
107 ; CHECK-NEXT: and w8, w8, #0xf0f0f0f
108 ; CHECK-NEXT: mov w9, #16843009
109 ; CHECK-NEXT: mul w8, w8, w9
110 ; CHECK-NEXT: lsr w0, w8, #24
113 ; CHECK-NONEON-LABEL: cnt32:
114 ; CHECK-NONEON: // %bb.0:
115 ; CHECK-NONEON-NEXT: lsr w8, w0, #1
116 ; CHECK-NONEON-NEXT: and w8, w8, #0x55555555
117 ; CHECK-NONEON-NEXT: sub w8, w0, w8
118 ; CHECK-NONEON-NEXT: and w9, w8, #0x33333333
119 ; CHECK-NONEON-NEXT: lsr w8, w8, #2
120 ; CHECK-NONEON-NEXT: and w8, w8, #0x33333333
121 ; CHECK-NONEON-NEXT: add w8, w9, w8
122 ; CHECK-NONEON-NEXT: add w8, w8, w8, lsr #4
123 ; CHECK-NONEON-NEXT: and w8, w8, #0xf0f0f0f
124 ; CHECK-NONEON-NEXT: mov w9, #16843009
125 ; CHECK-NONEON-NEXT: mul w8, w8, w9
126 ; CHECK-NONEON-NEXT: lsr w0, w8, #24
127 ; CHECK-NONEON-NEXT: ret
128 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
132 define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
133 ; CHECK-LABEL: cnt64:
135 ; CHECK-NEXT: lsr x8, x0, #1
136 ; CHECK-NEXT: and x8, x8, #0x5555555555555555
137 ; CHECK-NEXT: sub x8, x0, x8
138 ; CHECK-NEXT: and x9, x8, #0x3333333333333333
139 ; CHECK-NEXT: lsr x8, x8, #2
140 ; CHECK-NEXT: and x8, x8, #0x3333333333333333
141 ; CHECK-NEXT: add x8, x9, x8
142 ; CHECK-NEXT: add x8, x8, x8, lsr #4
143 ; CHECK-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
144 ; CHECK-NEXT: mov x9, #72340172838076673
145 ; CHECK-NEXT: mul x8, x8, x9
146 ; CHECK-NEXT: lsr x0, x8, #56
149 ; CHECK-NONEON-LABEL: cnt64:
150 ; CHECK-NONEON: // %bb.0:
151 ; CHECK-NONEON-NEXT: lsr x8, x0, #1
152 ; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
153 ; CHECK-NONEON-NEXT: sub x8, x0, x8
154 ; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
155 ; CHECK-NONEON-NEXT: lsr x8, x8, #2
156 ; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
157 ; CHECK-NONEON-NEXT: add x8, x9, x8
158 ; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
159 ; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
160 ; CHECK-NONEON-NEXT: mov x9, #72340172838076673
161 ; CHECK-NONEON-NEXT: mul x8, x8, x9
162 ; CHECK-NONEON-NEXT: lsr x0, x8, #56
163 ; CHECK-NONEON-NEXT: ret
164 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
168 define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
169 ; CHECK-LABEL: ctpop_eq_one:
171 ; CHECK-NEXT: fmov d0, x0
172 ; CHECK-NEXT: cnt.8b v0, v0
173 ; CHECK-NEXT: uaddlv.8b h0, v0
174 ; CHECK-NEXT: fmov w8, s0
175 ; CHECK-NEXT: cmp x8, #1 // =1
176 ; CHECK-NEXT: cset w0, eq
179 ; CHECK-NONEON-LABEL: ctpop_eq_one:
180 ; CHECK-NONEON: // %bb.0:
181 ; CHECK-NONEON-NEXT: lsr x8, x0, #1
182 ; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
183 ; CHECK-NONEON-NEXT: sub x8, x0, x8
184 ; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
185 ; CHECK-NONEON-NEXT: lsr x8, x8, #2
186 ; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
187 ; CHECK-NONEON-NEXT: add x8, x9, x8
188 ; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
189 ; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
190 ; CHECK-NONEON-NEXT: mov x9, #72340172838076673
191 ; CHECK-NONEON-NEXT: mul x8, x8, x9
192 ; CHECK-NONEON-NEXT: lsr x8, x8, #56
193 ; CHECK-NONEON-NEXT: cmp x8, #1 // =1
194 ; CHECK-NONEON-NEXT: cset w0, eq
195 ; CHECK-NONEON-NEXT: ret
196 %count = tail call i64 @llvm.ctpop.i64(i64 %x)
197 %cmp = icmp eq i64 %count, 1
198 %conv = zext i1 %cmp to i32
202 define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
203 ; CHECK-LABEL: ctpop_ne_one:
205 ; CHECK-NEXT: fmov d0, x0
206 ; CHECK-NEXT: cnt.8b v0, v0
207 ; CHECK-NEXT: uaddlv.8b h0, v0
208 ; CHECK-NEXT: fmov w8, s0
209 ; CHECK-NEXT: cmp x8, #1 // =1
210 ; CHECK-NEXT: cset w0, ne
213 ; CHECK-NONEON-LABEL: ctpop_ne_one:
214 ; CHECK-NONEON: // %bb.0:
215 ; CHECK-NONEON-NEXT: lsr x8, x0, #1
216 ; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
217 ; CHECK-NONEON-NEXT: sub x8, x0, x8
218 ; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
219 ; CHECK-NONEON-NEXT: lsr x8, x8, #2
220 ; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
221 ; CHECK-NONEON-NEXT: add x8, x9, x8
222 ; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
223 ; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
224 ; CHECK-NONEON-NEXT: mov x9, #72340172838076673
225 ; CHECK-NONEON-NEXT: mul x8, x8, x9
226 ; CHECK-NONEON-NEXT: lsr x8, x8, #56
227 ; CHECK-NONEON-NEXT: cmp x8, #1 // =1
228 ; CHECK-NONEON-NEXT: cset w0, ne
229 ; CHECK-NONEON-NEXT: ret
230 %count = tail call i64 @llvm.ctpop.i64(i64 %x)
231 %cmp = icmp ne i64 %count, 1
232 %conv = zext i1 %cmp to i32
237 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
238 declare i64 @llvm.ctpop.i64(i64) nounwind readnone