1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 ; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
4 ; RUN: llc < %s -mtriple=aarch64-eabi -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
6 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
7 ; CHECK-LABEL: cnt32_advsimd:
9 ; CHECK-NEXT: fmov s0, w0
10 ; CHECK-NEXT: cnt.8b v0, v0
11 ; CHECK-NEXT: uaddlv.8b h0, v0
12 ; CHECK-NEXT: fmov w0, s0
15 ; CHECK-NONEON-LABEL: cnt32_advsimd:
16 ; CHECK-NONEON: // %bb.0:
17 ; CHECK-NONEON-NEXT: lsr w9, w0, #1
18 ; CHECK-NONEON-NEXT: mov w8, #16843009
19 ; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
20 ; CHECK-NONEON-NEXT: sub w9, w0, w9
21 ; CHECK-NONEON-NEXT: lsr w10, w9, #2
22 ; CHECK-NONEON-NEXT: and w9, w9, #0x33333333
23 ; CHECK-NONEON-NEXT: and w10, w10, #0x33333333
24 ; CHECK-NONEON-NEXT: add w9, w9, w10
25 ; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4
26 ; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f
27 ; CHECK-NONEON-NEXT: mul w8, w9, w8
28 ; CHECK-NONEON-NEXT: lsr w0, w8, #24
29 ; CHECK-NONEON-NEXT: ret
31 ; CHECK-CSSC-LABEL: cnt32_advsimd:
32 ; CHECK-CSSC: // %bb.0:
33 ; CHECK-CSSC-NEXT: cnt w0, w0
34 ; CHECK-CSSC-NEXT: ret
35 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
39 define i32 @cnt32_advsimd_2(<2 x i32> %x) {
40 ; CHECK-LABEL: cnt32_advsimd_2:
42 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
43 ; CHECK-NEXT: fmov w8, s0
44 ; CHECK-NEXT: fmov s0, w8
45 ; CHECK-NEXT: cnt.8b v0, v0
46 ; CHECK-NEXT: uaddlv.8b h0, v0
47 ; CHECK-NEXT: fmov w0, s0
50 ; CHECK-NONEON-LABEL: cnt32_advsimd_2:
51 ; CHECK-NONEON: // %bb.0:
52 ; CHECK-NONEON-NEXT: lsr w9, w0, #1
53 ; CHECK-NONEON-NEXT: mov w8, #16843009
54 ; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
55 ; CHECK-NONEON-NEXT: sub w9, w0, w9
56 ; CHECK-NONEON-NEXT: lsr w10, w9, #2
57 ; CHECK-NONEON-NEXT: and w9, w9, #0x33333333
58 ; CHECK-NONEON-NEXT: and w10, w10, #0x33333333
59 ; CHECK-NONEON-NEXT: add w9, w9, w10
60 ; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4
61 ; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f
62 ; CHECK-NONEON-NEXT: mul w8, w9, w8
63 ; CHECK-NONEON-NEXT: lsr w0, w8, #24
64 ; CHECK-NONEON-NEXT: ret
66 ; CHECK-CSSC-LABEL: cnt32_advsimd_2:
67 ; CHECK-CSSC: // %bb.0:
68 ; CHECK-CSSC-NEXT: // kill: def $d0 killed $d0 def $q0
69 ; CHECK-CSSC-NEXT: fmov w8, s0
70 ; CHECK-CSSC-NEXT: cnt w0, w8
71 ; CHECK-CSSC-NEXT: ret
72 %1 = extractelement <2 x i32> %x, i64 0
73 %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
77 define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
78 ; CHECK-LABEL: cnt64_advsimd:
80 ; CHECK-NEXT: fmov d0, x0
81 ; CHECK-NEXT: cnt.8b v0, v0
82 ; CHECK-NEXT: uaddlv.8b h0, v0
83 ; CHECK-NEXT: fmov w0, s0
86 ; CHECK-NONEON-LABEL: cnt64_advsimd:
87 ; CHECK-NONEON: // %bb.0:
88 ; CHECK-NONEON-NEXT: lsr x9, x0, #1
89 ; CHECK-NONEON-NEXT: mov x8, #72340172838076673
90 ; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
91 ; CHECK-NONEON-NEXT: sub x9, x0, x9
92 ; CHECK-NONEON-NEXT: lsr x10, x9, #2
93 ; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
94 ; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
95 ; CHECK-NONEON-NEXT: add x9, x9, x10
96 ; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
97 ; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
98 ; CHECK-NONEON-NEXT: mul x8, x9, x8
99 ; CHECK-NONEON-NEXT: lsr x0, x8, #56
100 ; CHECK-NONEON-NEXT: ret
102 ; CHECK-CSSC-LABEL: cnt64_advsimd:
103 ; CHECK-CSSC: // %bb.0:
104 ; CHECK-CSSC-NEXT: cnt x0, x0
105 ; CHECK-CSSC-NEXT: ret
106 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
110 ; Do not use AdvSIMD when -mno-implicit-float is specified.
113 define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
114 ; CHECK-LABEL: cnt32:
116 ; CHECK-NEXT: lsr w9, w0, #1
117 ; CHECK-NEXT: mov w8, #16843009
118 ; CHECK-NEXT: and w9, w9, #0x55555555
119 ; CHECK-NEXT: sub w9, w0, w9
120 ; CHECK-NEXT: lsr w10, w9, #2
121 ; CHECK-NEXT: and w9, w9, #0x33333333
122 ; CHECK-NEXT: and w10, w10, #0x33333333
123 ; CHECK-NEXT: add w9, w9, w10
124 ; CHECK-NEXT: add w9, w9, w9, lsr #4
125 ; CHECK-NEXT: and w9, w9, #0xf0f0f0f
126 ; CHECK-NEXT: mul w8, w9, w8
127 ; CHECK-NEXT: lsr w0, w8, #24
130 ; CHECK-NONEON-LABEL: cnt32:
131 ; CHECK-NONEON: // %bb.0:
132 ; CHECK-NONEON-NEXT: lsr w9, w0, #1
133 ; CHECK-NONEON-NEXT: mov w8, #16843009
134 ; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
135 ; CHECK-NONEON-NEXT: sub w9, w0, w9
136 ; CHECK-NONEON-NEXT: lsr w10, w9, #2
137 ; CHECK-NONEON-NEXT: and w9, w9, #0x33333333
138 ; CHECK-NONEON-NEXT: and w10, w10, #0x33333333
139 ; CHECK-NONEON-NEXT: add w9, w9, w10
140 ; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4
141 ; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f
142 ; CHECK-NONEON-NEXT: mul w8, w9, w8
143 ; CHECK-NONEON-NEXT: lsr w0, w8, #24
144 ; CHECK-NONEON-NEXT: ret
146 ; CHECK-CSSC-LABEL: cnt32:
147 ; CHECK-CSSC: // %bb.0:
148 ; CHECK-CSSC-NEXT: cnt w0, w0
149 ; CHECK-CSSC-NEXT: ret
150 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
154 define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
155 ; CHECK-LABEL: cnt64:
157 ; CHECK-NEXT: lsr x9, x0, #1
158 ; CHECK-NEXT: mov x8, #72340172838076673
159 ; CHECK-NEXT: and x9, x9, #0x5555555555555555
160 ; CHECK-NEXT: sub x9, x0, x9
161 ; CHECK-NEXT: lsr x10, x9, #2
162 ; CHECK-NEXT: and x9, x9, #0x3333333333333333
163 ; CHECK-NEXT: and x10, x10, #0x3333333333333333
164 ; CHECK-NEXT: add x9, x9, x10
165 ; CHECK-NEXT: add x9, x9, x9, lsr #4
166 ; CHECK-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
167 ; CHECK-NEXT: mul x8, x9, x8
168 ; CHECK-NEXT: lsr x0, x8, #56
171 ; CHECK-NONEON-LABEL: cnt64:
172 ; CHECK-NONEON: // %bb.0:
173 ; CHECK-NONEON-NEXT: lsr x9, x0, #1
174 ; CHECK-NONEON-NEXT: mov x8, #72340172838076673
175 ; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
176 ; CHECK-NONEON-NEXT: sub x9, x0, x9
177 ; CHECK-NONEON-NEXT: lsr x10, x9, #2
178 ; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
179 ; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
180 ; CHECK-NONEON-NEXT: add x9, x9, x10
181 ; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
182 ; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
183 ; CHECK-NONEON-NEXT: mul x8, x9, x8
184 ; CHECK-NONEON-NEXT: lsr x0, x8, #56
185 ; CHECK-NONEON-NEXT: ret
187 ; CHECK-CSSC-LABEL: cnt64:
188 ; CHECK-CSSC: // %bb.0:
189 ; CHECK-CSSC-NEXT: cnt x0, x0
190 ; CHECK-CSSC-NEXT: ret
191 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
195 define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
196 ; CHECK-LABEL: ctpop_eq_one:
198 ; CHECK-NEXT: sub x8, x0, #1
199 ; CHECK-NEXT: tst x0, x8
200 ; CHECK-NEXT: ccmp x0, #0, #4, eq
201 ; CHECK-NEXT: cset w0, ne
204 ; CHECK-NONEON-LABEL: ctpop_eq_one:
205 ; CHECK-NONEON: // %bb.0:
206 ; CHECK-NONEON-NEXT: sub x8, x0, #1
207 ; CHECK-NONEON-NEXT: tst x0, x8
208 ; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
209 ; CHECK-NONEON-NEXT: cset w0, ne
210 ; CHECK-NONEON-NEXT: ret
212 ; CHECK-CSSC-LABEL: ctpop_eq_one:
213 ; CHECK-CSSC: // %bb.0:
214 ; CHECK-CSSC-NEXT: cnt x8, x0
215 ; CHECK-CSSC-NEXT: cmp x8, #1
216 ; CHECK-CSSC-NEXT: cset w0, eq
217 ; CHECK-CSSC-NEXT: ret
218 %count = tail call i64 @llvm.ctpop.i64(i64 %x)
219 %cmp = icmp eq i64 %count, 1
220 %conv = zext i1 %cmp to i32
224 define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
225 ; CHECK-LABEL: ctpop_ne_one:
227 ; CHECK-NEXT: sub x8, x0, #1
228 ; CHECK-NEXT: tst x0, x8
229 ; CHECK-NEXT: ccmp x0, #0, #4, eq
230 ; CHECK-NEXT: cset w0, eq
233 ; CHECK-NONEON-LABEL: ctpop_ne_one:
234 ; CHECK-NONEON: // %bb.0:
235 ; CHECK-NONEON-NEXT: sub x8, x0, #1
236 ; CHECK-NONEON-NEXT: tst x0, x8
237 ; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
238 ; CHECK-NONEON-NEXT: cset w0, eq
239 ; CHECK-NONEON-NEXT: ret
241 ; CHECK-CSSC-LABEL: ctpop_ne_one:
242 ; CHECK-CSSC: // %bb.0:
243 ; CHECK-CSSC-NEXT: cnt x8, x0
244 ; CHECK-CSSC-NEXT: cmp x8, #1
245 ; CHECK-CSSC-NEXT: cset w0, ne
246 ; CHECK-CSSC-NEXT: ret
247 %count = tail call i64 @llvm.ctpop.i64(i64 %x)
248 %cmp = icmp ne i64 %count, 1
249 %conv = zext i1 %cmp to i32
253 define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
254 ; CHECK-LABEL: ctpop32_ne_one:
256 ; CHECK-NEXT: sub w8, w0, #1
257 ; CHECK-NEXT: tst w0, w8
258 ; CHECK-NEXT: ccmp w0, #0, #4, eq
259 ; CHECK-NEXT: cset w0, eq
262 ; CHECK-NONEON-LABEL: ctpop32_ne_one:
263 ; CHECK-NONEON: // %bb.0:
264 ; CHECK-NONEON-NEXT: sub w8, w0, #1
265 ; CHECK-NONEON-NEXT: tst w0, w8
266 ; CHECK-NONEON-NEXT: ccmp w0, #0, #4, eq
267 ; CHECK-NONEON-NEXT: cset w0, eq
268 ; CHECK-NONEON-NEXT: ret
270 ; CHECK-CSSC-LABEL: ctpop32_ne_one:
271 ; CHECK-CSSC: // %bb.0:
272 ; CHECK-CSSC-NEXT: cnt w8, w0
273 ; CHECK-CSSC-NEXT: cmp w8, #1
274 ; CHECK-CSSC-NEXT: cset w0, ne
275 ; CHECK-CSSC-NEXT: ret
276 %count = tail call i32 @llvm.ctpop.i32(i32 %x)
277 %cmp = icmp ne i32 %count, 1
281 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
282 declare i64 @llvm.ctpop.i64(i64) nounwind readnone