1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
7 define <vscale x 1 x i8> @ctpop_nxv1i8(<vscale x 1 x i8> %va) {
8 ; CHECK-LABEL: ctpop_nxv1i8:
10 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
11 ; CHECK-NEXT: vsrl.vi v9, v8, 1
12 ; CHECK-NEXT: li a0, 85
13 ; CHECK-NEXT: vand.vx v9, v9, a0
14 ; CHECK-NEXT: vsub.vv v8, v8, v9
15 ; CHECK-NEXT: li a0, 51
16 ; CHECK-NEXT: vand.vx v9, v8, a0
17 ; CHECK-NEXT: vsrl.vi v8, v8, 2
18 ; CHECK-NEXT: vand.vx v8, v8, a0
19 ; CHECK-NEXT: vadd.vv v8, v9, v8
20 ; CHECK-NEXT: vsrl.vi v9, v8, 4
21 ; CHECK-NEXT: vadd.vv v8, v8, v9
22 ; CHECK-NEXT: vand.vi v8, v8, 15
25 ; CHECK-ZVBB-LABEL: ctpop_nxv1i8:
26 ; CHECK-ZVBB: # %bb.0:
27 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
28 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
29 ; CHECK-ZVBB-NEXT: ret
30 %a = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> %va)
31 ret <vscale x 1 x i8> %a
33 declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
35 define <vscale x 2 x i8> @ctpop_nxv2i8(<vscale x 2 x i8> %va) {
36 ; CHECK-LABEL: ctpop_nxv2i8:
38 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
39 ; CHECK-NEXT: vsrl.vi v9, v8, 1
40 ; CHECK-NEXT: li a0, 85
41 ; CHECK-NEXT: vand.vx v9, v9, a0
42 ; CHECK-NEXT: vsub.vv v8, v8, v9
43 ; CHECK-NEXT: li a0, 51
44 ; CHECK-NEXT: vand.vx v9, v8, a0
45 ; CHECK-NEXT: vsrl.vi v8, v8, 2
46 ; CHECK-NEXT: vand.vx v8, v8, a0
47 ; CHECK-NEXT: vadd.vv v8, v9, v8
48 ; CHECK-NEXT: vsrl.vi v9, v8, 4
49 ; CHECK-NEXT: vadd.vv v8, v8, v9
50 ; CHECK-NEXT: vand.vi v8, v8, 15
53 ; CHECK-ZVBB-LABEL: ctpop_nxv2i8:
54 ; CHECK-ZVBB: # %bb.0:
55 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
56 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
57 ; CHECK-ZVBB-NEXT: ret
58 %a = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> %va)
59 ret <vscale x 2 x i8> %a
61 declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
63 define <vscale x 4 x i8> @ctpop_nxv4i8(<vscale x 4 x i8> %va) {
64 ; CHECK-LABEL: ctpop_nxv4i8:
66 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
67 ; CHECK-NEXT: vsrl.vi v9, v8, 1
68 ; CHECK-NEXT: li a0, 85
69 ; CHECK-NEXT: vand.vx v9, v9, a0
70 ; CHECK-NEXT: vsub.vv v8, v8, v9
71 ; CHECK-NEXT: li a0, 51
72 ; CHECK-NEXT: vand.vx v9, v8, a0
73 ; CHECK-NEXT: vsrl.vi v8, v8, 2
74 ; CHECK-NEXT: vand.vx v8, v8, a0
75 ; CHECK-NEXT: vadd.vv v8, v9, v8
76 ; CHECK-NEXT: vsrl.vi v9, v8, 4
77 ; CHECK-NEXT: vadd.vv v8, v8, v9
78 ; CHECK-NEXT: vand.vi v8, v8, 15
81 ; CHECK-ZVBB-LABEL: ctpop_nxv4i8:
82 ; CHECK-ZVBB: # %bb.0:
83 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
84 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
85 ; CHECK-ZVBB-NEXT: ret
86 %a = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> %va)
87 ret <vscale x 4 x i8> %a
89 declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
91 define <vscale x 8 x i8> @ctpop_nxv8i8(<vscale x 8 x i8> %va) {
92 ; CHECK-LABEL: ctpop_nxv8i8:
94 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
95 ; CHECK-NEXT: vsrl.vi v9, v8, 1
96 ; CHECK-NEXT: li a0, 85
97 ; CHECK-NEXT: vand.vx v9, v9, a0
98 ; CHECK-NEXT: vsub.vv v8, v8, v9
99 ; CHECK-NEXT: li a0, 51
100 ; CHECK-NEXT: vand.vx v9, v8, a0
101 ; CHECK-NEXT: vsrl.vi v8, v8, 2
102 ; CHECK-NEXT: vand.vx v8, v8, a0
103 ; CHECK-NEXT: vadd.vv v8, v9, v8
104 ; CHECK-NEXT: vsrl.vi v9, v8, 4
105 ; CHECK-NEXT: vadd.vv v8, v8, v9
106 ; CHECK-NEXT: vand.vi v8, v8, 15
109 ; CHECK-ZVBB-LABEL: ctpop_nxv8i8:
110 ; CHECK-ZVBB: # %bb.0:
111 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
112 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
113 ; CHECK-ZVBB-NEXT: ret
114 %a = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> %va)
115 ret <vscale x 8 x i8> %a
117 declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
119 define <vscale x 16 x i8> @ctpop_nxv16i8(<vscale x 16 x i8> %va) {
120 ; CHECK-LABEL: ctpop_nxv16i8:
122 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
123 ; CHECK-NEXT: vsrl.vi v10, v8, 1
124 ; CHECK-NEXT: li a0, 85
125 ; CHECK-NEXT: vand.vx v10, v10, a0
126 ; CHECK-NEXT: vsub.vv v8, v8, v10
127 ; CHECK-NEXT: li a0, 51
128 ; CHECK-NEXT: vand.vx v10, v8, a0
129 ; CHECK-NEXT: vsrl.vi v8, v8, 2
130 ; CHECK-NEXT: vand.vx v8, v8, a0
131 ; CHECK-NEXT: vadd.vv v8, v10, v8
132 ; CHECK-NEXT: vsrl.vi v10, v8, 4
133 ; CHECK-NEXT: vadd.vv v8, v8, v10
134 ; CHECK-NEXT: vand.vi v8, v8, 15
137 ; CHECK-ZVBB-LABEL: ctpop_nxv16i8:
138 ; CHECK-ZVBB: # %bb.0:
139 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
140 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
141 ; CHECK-ZVBB-NEXT: ret
142 %a = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %va)
143 ret <vscale x 16 x i8> %a
145 declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
147 define <vscale x 32 x i8> @ctpop_nxv32i8(<vscale x 32 x i8> %va) {
148 ; CHECK-LABEL: ctpop_nxv32i8:
150 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
151 ; CHECK-NEXT: vsrl.vi v12, v8, 1
152 ; CHECK-NEXT: li a0, 85
153 ; CHECK-NEXT: vand.vx v12, v12, a0
154 ; CHECK-NEXT: vsub.vv v8, v8, v12
155 ; CHECK-NEXT: li a0, 51
156 ; CHECK-NEXT: vand.vx v12, v8, a0
157 ; CHECK-NEXT: vsrl.vi v8, v8, 2
158 ; CHECK-NEXT: vand.vx v8, v8, a0
159 ; CHECK-NEXT: vadd.vv v8, v12, v8
160 ; CHECK-NEXT: vsrl.vi v12, v8, 4
161 ; CHECK-NEXT: vadd.vv v8, v8, v12
162 ; CHECK-NEXT: vand.vi v8, v8, 15
165 ; CHECK-ZVBB-LABEL: ctpop_nxv32i8:
166 ; CHECK-ZVBB: # %bb.0:
167 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma
168 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
169 ; CHECK-ZVBB-NEXT: ret
170 %a = call <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8> %va)
171 ret <vscale x 32 x i8> %a
173 declare <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8>)
175 define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) {
176 ; CHECK-LABEL: ctpop_nxv64i8:
178 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
179 ; CHECK-NEXT: vsrl.vi v16, v8, 1
180 ; CHECK-NEXT: li a0, 85
181 ; CHECK-NEXT: vand.vx v16, v16, a0
182 ; CHECK-NEXT: vsub.vv v8, v8, v16
183 ; CHECK-NEXT: li a0, 51
184 ; CHECK-NEXT: vand.vx v16, v8, a0
185 ; CHECK-NEXT: vsrl.vi v8, v8, 2
186 ; CHECK-NEXT: vand.vx v8, v8, a0
187 ; CHECK-NEXT: vadd.vv v8, v16, v8
188 ; CHECK-NEXT: vsrl.vi v16, v8, 4
189 ; CHECK-NEXT: vadd.vv v8, v8, v16
190 ; CHECK-NEXT: vand.vi v8, v8, 15
193 ; CHECK-ZVBB-LABEL: ctpop_nxv64i8:
194 ; CHECK-ZVBB: # %bb.0:
195 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma
196 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
197 ; CHECK-ZVBB-NEXT: ret
198 %a = call <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8> %va)
199 ret <vscale x 64 x i8> %a
201 declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>)
203 define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
204 ; CHECK-LABEL: ctpop_nxv1i16:
206 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
207 ; CHECK-NEXT: vsrl.vi v9, v8, 1
208 ; CHECK-NEXT: lui a0, 5
209 ; CHECK-NEXT: addi a0, a0, 1365
210 ; CHECK-NEXT: vand.vx v9, v9, a0
211 ; CHECK-NEXT: vsub.vv v8, v8, v9
212 ; CHECK-NEXT: lui a0, 3
213 ; CHECK-NEXT: addi a0, a0, 819
214 ; CHECK-NEXT: vand.vx v9, v8, a0
215 ; CHECK-NEXT: vsrl.vi v8, v8, 2
216 ; CHECK-NEXT: vand.vx v8, v8, a0
217 ; CHECK-NEXT: vadd.vv v8, v9, v8
218 ; CHECK-NEXT: vsrl.vi v9, v8, 4
219 ; CHECK-NEXT: vadd.vv v8, v8, v9
220 ; CHECK-NEXT: lui a0, 1
221 ; CHECK-NEXT: addi a0, a0, -241
222 ; CHECK-NEXT: vand.vx v8, v8, a0
223 ; CHECK-NEXT: li a0, 257
224 ; CHECK-NEXT: vmul.vx v8, v8, a0
225 ; CHECK-NEXT: vsrl.vi v8, v8, 8
228 ; CHECK-ZVBB-LABEL: ctpop_nxv1i16:
229 ; CHECK-ZVBB: # %bb.0:
230 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
231 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
232 ; CHECK-ZVBB-NEXT: ret
233 %a = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> %va)
234 ret <vscale x 1 x i16> %a
236 declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
238 define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
239 ; CHECK-LABEL: ctpop_nxv2i16:
241 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
242 ; CHECK-NEXT: vsrl.vi v9, v8, 1
243 ; CHECK-NEXT: lui a0, 5
244 ; CHECK-NEXT: addi a0, a0, 1365
245 ; CHECK-NEXT: vand.vx v9, v9, a0
246 ; CHECK-NEXT: vsub.vv v8, v8, v9
247 ; CHECK-NEXT: lui a0, 3
248 ; CHECK-NEXT: addi a0, a0, 819
249 ; CHECK-NEXT: vand.vx v9, v8, a0
250 ; CHECK-NEXT: vsrl.vi v8, v8, 2
251 ; CHECK-NEXT: vand.vx v8, v8, a0
252 ; CHECK-NEXT: vadd.vv v8, v9, v8
253 ; CHECK-NEXT: vsrl.vi v9, v8, 4
254 ; CHECK-NEXT: vadd.vv v8, v8, v9
255 ; CHECK-NEXT: lui a0, 1
256 ; CHECK-NEXT: addi a0, a0, -241
257 ; CHECK-NEXT: vand.vx v8, v8, a0
258 ; CHECK-NEXT: li a0, 257
259 ; CHECK-NEXT: vmul.vx v8, v8, a0
260 ; CHECK-NEXT: vsrl.vi v8, v8, 8
263 ; CHECK-ZVBB-LABEL: ctpop_nxv2i16:
264 ; CHECK-ZVBB: # %bb.0:
265 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
266 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
267 ; CHECK-ZVBB-NEXT: ret
268 %a = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> %va)
269 ret <vscale x 2 x i16> %a
271 declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
273 define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
274 ; CHECK-LABEL: ctpop_nxv4i16:
276 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
277 ; CHECK-NEXT: vsrl.vi v9, v8, 1
278 ; CHECK-NEXT: lui a0, 5
279 ; CHECK-NEXT: addi a0, a0, 1365
280 ; CHECK-NEXT: vand.vx v9, v9, a0
281 ; CHECK-NEXT: vsub.vv v8, v8, v9
282 ; CHECK-NEXT: lui a0, 3
283 ; CHECK-NEXT: addi a0, a0, 819
284 ; CHECK-NEXT: vand.vx v9, v8, a0
285 ; CHECK-NEXT: vsrl.vi v8, v8, 2
286 ; CHECK-NEXT: vand.vx v8, v8, a0
287 ; CHECK-NEXT: vadd.vv v8, v9, v8
288 ; CHECK-NEXT: vsrl.vi v9, v8, 4
289 ; CHECK-NEXT: vadd.vv v8, v8, v9
290 ; CHECK-NEXT: lui a0, 1
291 ; CHECK-NEXT: addi a0, a0, -241
292 ; CHECK-NEXT: vand.vx v8, v8, a0
293 ; CHECK-NEXT: li a0, 257
294 ; CHECK-NEXT: vmul.vx v8, v8, a0
295 ; CHECK-NEXT: vsrl.vi v8, v8, 8
298 ; CHECK-ZVBB-LABEL: ctpop_nxv4i16:
299 ; CHECK-ZVBB: # %bb.0:
300 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
301 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
302 ; CHECK-ZVBB-NEXT: ret
303 %a = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> %va)
304 ret <vscale x 4 x i16> %a
306 declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
308 define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
309 ; CHECK-LABEL: ctpop_nxv8i16:
311 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
312 ; CHECK-NEXT: vsrl.vi v10, v8, 1
313 ; CHECK-NEXT: lui a0, 5
314 ; CHECK-NEXT: addi a0, a0, 1365
315 ; CHECK-NEXT: vand.vx v10, v10, a0
316 ; CHECK-NEXT: vsub.vv v8, v8, v10
317 ; CHECK-NEXT: lui a0, 3
318 ; CHECK-NEXT: addi a0, a0, 819
319 ; CHECK-NEXT: vand.vx v10, v8, a0
320 ; CHECK-NEXT: vsrl.vi v8, v8, 2
321 ; CHECK-NEXT: vand.vx v8, v8, a0
322 ; CHECK-NEXT: vadd.vv v8, v10, v8
323 ; CHECK-NEXT: vsrl.vi v10, v8, 4
324 ; CHECK-NEXT: vadd.vv v8, v8, v10
325 ; CHECK-NEXT: lui a0, 1
326 ; CHECK-NEXT: addi a0, a0, -241
327 ; CHECK-NEXT: vand.vx v8, v8, a0
328 ; CHECK-NEXT: li a0, 257
329 ; CHECK-NEXT: vmul.vx v8, v8, a0
330 ; CHECK-NEXT: vsrl.vi v8, v8, 8
333 ; CHECK-ZVBB-LABEL: ctpop_nxv8i16:
334 ; CHECK-ZVBB: # %bb.0:
335 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
336 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
337 ; CHECK-ZVBB-NEXT: ret
338 %a = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %va)
339 ret <vscale x 8 x i16> %a
341 declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
343 define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
344 ; CHECK-LABEL: ctpop_nxv16i16:
346 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
347 ; CHECK-NEXT: vsrl.vi v12, v8, 1
348 ; CHECK-NEXT: lui a0, 5
349 ; CHECK-NEXT: addi a0, a0, 1365
350 ; CHECK-NEXT: vand.vx v12, v12, a0
351 ; CHECK-NEXT: vsub.vv v8, v8, v12
352 ; CHECK-NEXT: lui a0, 3
353 ; CHECK-NEXT: addi a0, a0, 819
354 ; CHECK-NEXT: vand.vx v12, v8, a0
355 ; CHECK-NEXT: vsrl.vi v8, v8, 2
356 ; CHECK-NEXT: vand.vx v8, v8, a0
357 ; CHECK-NEXT: vadd.vv v8, v12, v8
358 ; CHECK-NEXT: vsrl.vi v12, v8, 4
359 ; CHECK-NEXT: vadd.vv v8, v8, v12
360 ; CHECK-NEXT: lui a0, 1
361 ; CHECK-NEXT: addi a0, a0, -241
362 ; CHECK-NEXT: vand.vx v8, v8, a0
363 ; CHECK-NEXT: li a0, 257
364 ; CHECK-NEXT: vmul.vx v8, v8, a0
365 ; CHECK-NEXT: vsrl.vi v8, v8, 8
368 ; CHECK-ZVBB-LABEL: ctpop_nxv16i16:
369 ; CHECK-ZVBB: # %bb.0:
370 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
371 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
372 ; CHECK-ZVBB-NEXT: ret
373 %a = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> %va)
374 ret <vscale x 16 x i16> %a
376 declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
378 define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
379 ; CHECK-LABEL: ctpop_nxv32i16:
381 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
382 ; CHECK-NEXT: vsrl.vi v16, v8, 1
383 ; CHECK-NEXT: lui a0, 5
384 ; CHECK-NEXT: addi a0, a0, 1365
385 ; CHECK-NEXT: vand.vx v16, v16, a0
386 ; CHECK-NEXT: vsub.vv v8, v8, v16
387 ; CHECK-NEXT: lui a0, 3
388 ; CHECK-NEXT: addi a0, a0, 819
389 ; CHECK-NEXT: vand.vx v16, v8, a0
390 ; CHECK-NEXT: vsrl.vi v8, v8, 2
391 ; CHECK-NEXT: vand.vx v8, v8, a0
392 ; CHECK-NEXT: vadd.vv v8, v16, v8
393 ; CHECK-NEXT: vsrl.vi v16, v8, 4
394 ; CHECK-NEXT: vadd.vv v8, v8, v16
395 ; CHECK-NEXT: lui a0, 1
396 ; CHECK-NEXT: addi a0, a0, -241
397 ; CHECK-NEXT: vand.vx v8, v8, a0
398 ; CHECK-NEXT: li a0, 257
399 ; CHECK-NEXT: vmul.vx v8, v8, a0
400 ; CHECK-NEXT: vsrl.vi v8, v8, 8
403 ; CHECK-ZVBB-LABEL: ctpop_nxv32i16:
404 ; CHECK-ZVBB: # %bb.0:
405 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
406 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
407 ; CHECK-ZVBB-NEXT: ret
408 %a = call <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16> %va)
409 ret <vscale x 32 x i16> %a
411 declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>)
413 define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
414 ; CHECK-LABEL: ctpop_nxv1i32:
416 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
417 ; CHECK-NEXT: vsrl.vi v9, v8, 1
418 ; CHECK-NEXT: lui a0, 349525
419 ; CHECK-NEXT: addi a0, a0, 1365
420 ; CHECK-NEXT: vand.vx v9, v9, a0
421 ; CHECK-NEXT: vsub.vv v8, v8, v9
422 ; CHECK-NEXT: lui a0, 209715
423 ; CHECK-NEXT: addi a0, a0, 819
424 ; CHECK-NEXT: vand.vx v9, v8, a0
425 ; CHECK-NEXT: vsrl.vi v8, v8, 2
426 ; CHECK-NEXT: vand.vx v8, v8, a0
427 ; CHECK-NEXT: vadd.vv v8, v9, v8
428 ; CHECK-NEXT: vsrl.vi v9, v8, 4
429 ; CHECK-NEXT: vadd.vv v8, v8, v9
430 ; CHECK-NEXT: lui a0, 61681
431 ; CHECK-NEXT: addi a0, a0, -241
432 ; CHECK-NEXT: vand.vx v8, v8, a0
433 ; CHECK-NEXT: lui a0, 4112
434 ; CHECK-NEXT: addi a0, a0, 257
435 ; CHECK-NEXT: vmul.vx v8, v8, a0
436 ; CHECK-NEXT: vsrl.vi v8, v8, 24
439 ; CHECK-ZVBB-LABEL: ctpop_nxv1i32:
440 ; CHECK-ZVBB: # %bb.0:
441 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
442 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
443 ; CHECK-ZVBB-NEXT: ret
444 %a = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> %va)
445 ret <vscale x 1 x i32> %a
447 declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
449 define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
450 ; CHECK-LABEL: ctpop_nxv2i32:
452 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
453 ; CHECK-NEXT: vsrl.vi v9, v8, 1
454 ; CHECK-NEXT: lui a0, 349525
455 ; CHECK-NEXT: addi a0, a0, 1365
456 ; CHECK-NEXT: vand.vx v9, v9, a0
457 ; CHECK-NEXT: vsub.vv v8, v8, v9
458 ; CHECK-NEXT: lui a0, 209715
459 ; CHECK-NEXT: addi a0, a0, 819
460 ; CHECK-NEXT: vand.vx v9, v8, a0
461 ; CHECK-NEXT: vsrl.vi v8, v8, 2
462 ; CHECK-NEXT: vand.vx v8, v8, a0
463 ; CHECK-NEXT: vadd.vv v8, v9, v8
464 ; CHECK-NEXT: vsrl.vi v9, v8, 4
465 ; CHECK-NEXT: vadd.vv v8, v8, v9
466 ; CHECK-NEXT: lui a0, 61681
467 ; CHECK-NEXT: addi a0, a0, -241
468 ; CHECK-NEXT: vand.vx v8, v8, a0
469 ; CHECK-NEXT: lui a0, 4112
470 ; CHECK-NEXT: addi a0, a0, 257
471 ; CHECK-NEXT: vmul.vx v8, v8, a0
472 ; CHECK-NEXT: vsrl.vi v8, v8, 24
475 ; CHECK-ZVBB-LABEL: ctpop_nxv2i32:
476 ; CHECK-ZVBB: # %bb.0:
477 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
478 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
479 ; CHECK-ZVBB-NEXT: ret
480 %a = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> %va)
481 ret <vscale x 2 x i32> %a
483 declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
485 define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
486 ; CHECK-LABEL: ctpop_nxv4i32:
488 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
489 ; CHECK-NEXT: vsrl.vi v10, v8, 1
490 ; CHECK-NEXT: lui a0, 349525
491 ; CHECK-NEXT: addi a0, a0, 1365
492 ; CHECK-NEXT: vand.vx v10, v10, a0
493 ; CHECK-NEXT: vsub.vv v8, v8, v10
494 ; CHECK-NEXT: lui a0, 209715
495 ; CHECK-NEXT: addi a0, a0, 819
496 ; CHECK-NEXT: vand.vx v10, v8, a0
497 ; CHECK-NEXT: vsrl.vi v8, v8, 2
498 ; CHECK-NEXT: vand.vx v8, v8, a0
499 ; CHECK-NEXT: vadd.vv v8, v10, v8
500 ; CHECK-NEXT: vsrl.vi v10, v8, 4
501 ; CHECK-NEXT: vadd.vv v8, v8, v10
502 ; CHECK-NEXT: lui a0, 61681
503 ; CHECK-NEXT: addi a0, a0, -241
504 ; CHECK-NEXT: vand.vx v8, v8, a0
505 ; CHECK-NEXT: lui a0, 4112
506 ; CHECK-NEXT: addi a0, a0, 257
507 ; CHECK-NEXT: vmul.vx v8, v8, a0
508 ; CHECK-NEXT: vsrl.vi v8, v8, 24
511 ; CHECK-ZVBB-LABEL: ctpop_nxv4i32:
512 ; CHECK-ZVBB: # %bb.0:
513 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
514 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
515 ; CHECK-ZVBB-NEXT: ret
516 %a = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %va)
517 ret <vscale x 4 x i32> %a
519 declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
521 define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
522 ; CHECK-LABEL: ctpop_nxv8i32:
524 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
525 ; CHECK-NEXT: vsrl.vi v12, v8, 1
526 ; CHECK-NEXT: lui a0, 349525
527 ; CHECK-NEXT: addi a0, a0, 1365
528 ; CHECK-NEXT: vand.vx v12, v12, a0
529 ; CHECK-NEXT: vsub.vv v8, v8, v12
530 ; CHECK-NEXT: lui a0, 209715
531 ; CHECK-NEXT: addi a0, a0, 819
532 ; CHECK-NEXT: vand.vx v12, v8, a0
533 ; CHECK-NEXT: vsrl.vi v8, v8, 2
534 ; CHECK-NEXT: vand.vx v8, v8, a0
535 ; CHECK-NEXT: vadd.vv v8, v12, v8
536 ; CHECK-NEXT: vsrl.vi v12, v8, 4
537 ; CHECK-NEXT: vadd.vv v8, v8, v12
538 ; CHECK-NEXT: lui a0, 61681
539 ; CHECK-NEXT: addi a0, a0, -241
540 ; CHECK-NEXT: vand.vx v8, v8, a0
541 ; CHECK-NEXT: lui a0, 4112
542 ; CHECK-NEXT: addi a0, a0, 257
543 ; CHECK-NEXT: vmul.vx v8, v8, a0
544 ; CHECK-NEXT: vsrl.vi v8, v8, 24
547 ; CHECK-ZVBB-LABEL: ctpop_nxv8i32:
548 ; CHECK-ZVBB: # %bb.0:
549 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m4, ta, ma
550 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
551 ; CHECK-ZVBB-NEXT: ret
552 %a = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> %va)
553 ret <vscale x 8 x i32> %a
555 declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
557 define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) {
558 ; CHECK-LABEL: ctpop_nxv16i32:
560 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
561 ; CHECK-NEXT: vsrl.vi v16, v8, 1
562 ; CHECK-NEXT: lui a0, 349525
563 ; CHECK-NEXT: addi a0, a0, 1365
564 ; CHECK-NEXT: vand.vx v16, v16, a0
565 ; CHECK-NEXT: vsub.vv v8, v8, v16
566 ; CHECK-NEXT: lui a0, 209715
567 ; CHECK-NEXT: addi a0, a0, 819
568 ; CHECK-NEXT: vand.vx v16, v8, a0
569 ; CHECK-NEXT: vsrl.vi v8, v8, 2
570 ; CHECK-NEXT: vand.vx v8, v8, a0
571 ; CHECK-NEXT: vadd.vv v8, v16, v8
572 ; CHECK-NEXT: vsrl.vi v16, v8, 4
573 ; CHECK-NEXT: vadd.vv v8, v8, v16
574 ; CHECK-NEXT: lui a0, 61681
575 ; CHECK-NEXT: addi a0, a0, -241
576 ; CHECK-NEXT: vand.vx v8, v8, a0
577 ; CHECK-NEXT: lui a0, 4112
578 ; CHECK-NEXT: addi a0, a0, 257
579 ; CHECK-NEXT: vmul.vx v8, v8, a0
580 ; CHECK-NEXT: vsrl.vi v8, v8, 24
583 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32:
584 ; CHECK-ZVBB: # %bb.0:
585 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
586 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
587 ; CHECK-ZVBB-NEXT: ret
588 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
589 ret <vscale x 16 x i32> %a
592 ; We always emit vcpop.v for the scalable vector
593 define <vscale x 16 x i1> @ctpop_nxv16i32_ult_two(<vscale x 16 x i32> %va) {
594 ; CHECK-LABEL: ctpop_nxv16i32_ult_two:
596 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
597 ; CHECK-NEXT: vadd.vi v16, v8, -1
598 ; CHECK-NEXT: vand.vv v8, v8, v16
599 ; CHECK-NEXT: vmseq.vi v0, v8, 0
602 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ult_two:
603 ; CHECK-ZVBB: # %bb.0:
604 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
605 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
606 ; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1
607 ; CHECK-ZVBB-NEXT: ret
608 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
609 %cmp = icmp ult <vscale x 16 x i32> %a, splat (i32 2)
610 ret <vscale x 16 x i1> %cmp
613 define <vscale x 16 x i1> @ctpop_nxv16i32_ugt_one(<vscale x 16 x i32> %va) {
614 ; CHECK-LABEL: ctpop_nxv16i32_ugt_one:
616 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
617 ; CHECK-NEXT: vadd.vi v16, v8, -1
618 ; CHECK-NEXT: vand.vv v8, v8, v16
619 ; CHECK-NEXT: vmsne.vi v0, v8, 0
622 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ugt_one:
623 ; CHECK-ZVBB: # %bb.0:
624 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
625 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
626 ; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1
627 ; CHECK-ZVBB-NEXT: ret
628 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
629 %cmp = icmp ugt <vscale x 16 x i32> %a, splat (i32 1)
630 ret <vscale x 16 x i1> %cmp
633 define <vscale x 16 x i1> @ctpop_nxv16i32_eq_one(<vscale x 16 x i32> %va) {
634 ; CHECK-LABEL: ctpop_nxv16i32_eq_one:
636 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
637 ; CHECK-NEXT: vadd.vi v16, v8, -1
638 ; CHECK-NEXT: vxor.vv v8, v8, v16
639 ; CHECK-NEXT: vmsltu.vv v0, v16, v8
642 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_eq_one:
643 ; CHECK-ZVBB: # %bb.0:
644 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
645 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
646 ; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1
647 ; CHECK-ZVBB-NEXT: ret
648 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
649 %cmp = icmp eq <vscale x 16 x i32> %a, splat (i32 1)
650 ret <vscale x 16 x i1> %cmp
653 define <vscale x 16 x i1> @ctpop_nxv16i32_ne_one(<vscale x 16 x i32> %va) {
654 ; CHECK-LABEL: ctpop_nxv16i32_ne_one:
656 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
657 ; CHECK-NEXT: vadd.vi v16, v8, -1
658 ; CHECK-NEXT: vxor.vv v8, v8, v16
659 ; CHECK-NEXT: vmsleu.vv v0, v8, v16
662 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ne_one:
663 ; CHECK-ZVBB: # %bb.0:
664 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
665 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
666 ; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1
667 ; CHECK-ZVBB-NEXT: ret
668 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
669 %cmp = icmp ne <vscale x 16 x i32> %a, splat (i32 1)
670 ret <vscale x 16 x i1> %cmp
673 declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
675 define <vscale x 1 x i64> @ctpop_nxv1i64(<vscale x 1 x i64> %va) {
676 ; RV32-LABEL: ctpop_nxv1i64:
678 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
679 ; RV32-NEXT: vsrl.vi v9, v8, 1
680 ; RV32-NEXT: lui a0, 349525
681 ; RV32-NEXT: addi a0, a0, 1365
682 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
683 ; RV32-NEXT: vmv.v.x v10, a0
684 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
685 ; RV32-NEXT: vand.vv v9, v9, v10
686 ; RV32-NEXT: vsub.vv v8, v8, v9
687 ; RV32-NEXT: lui a0, 209715
688 ; RV32-NEXT: addi a0, a0, 819
689 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
690 ; RV32-NEXT: vmv.v.x v9, a0
691 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
692 ; RV32-NEXT: vand.vv v10, v8, v9
693 ; RV32-NEXT: vsrl.vi v8, v8, 2
694 ; RV32-NEXT: vand.vv v8, v8, v9
695 ; RV32-NEXT: vadd.vv v8, v10, v8
696 ; RV32-NEXT: vsrl.vi v9, v8, 4
697 ; RV32-NEXT: vadd.vv v8, v8, v9
698 ; RV32-NEXT: lui a0, 61681
699 ; RV32-NEXT: addi a0, a0, -241
700 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
701 ; RV32-NEXT: vmv.v.x v9, a0
702 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
703 ; RV32-NEXT: vand.vv v8, v8, v9
704 ; RV32-NEXT: lui a0, 4112
705 ; RV32-NEXT: addi a0, a0, 257
706 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
707 ; RV32-NEXT: vmv.v.x v9, a0
708 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
709 ; RV32-NEXT: vmul.vv v8, v8, v9
710 ; RV32-NEXT: li a0, 56
711 ; RV32-NEXT: vsrl.vx v8, v8, a0
714 ; RV64-LABEL: ctpop_nxv1i64:
716 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
717 ; RV64-NEXT: vsrl.vi v9, v8, 1
718 ; RV64-NEXT: lui a0, 349525
719 ; RV64-NEXT: addiw a0, a0, 1365
720 ; RV64-NEXT: slli a1, a0, 32
721 ; RV64-NEXT: add a0, a0, a1
722 ; RV64-NEXT: vand.vx v9, v9, a0
723 ; RV64-NEXT: vsub.vv v8, v8, v9
724 ; RV64-NEXT: lui a0, 209715
725 ; RV64-NEXT: addiw a0, a0, 819
726 ; RV64-NEXT: slli a1, a0, 32
727 ; RV64-NEXT: add a0, a0, a1
728 ; RV64-NEXT: vand.vx v9, v8, a0
729 ; RV64-NEXT: vsrl.vi v8, v8, 2
730 ; RV64-NEXT: vand.vx v8, v8, a0
731 ; RV64-NEXT: vadd.vv v8, v9, v8
732 ; RV64-NEXT: vsrl.vi v9, v8, 4
733 ; RV64-NEXT: vadd.vv v8, v8, v9
734 ; RV64-NEXT: lui a0, 61681
735 ; RV64-NEXT: addiw a0, a0, -241
736 ; RV64-NEXT: slli a1, a0, 32
737 ; RV64-NEXT: add a0, a0, a1
738 ; RV64-NEXT: vand.vx v8, v8, a0
739 ; RV64-NEXT: lui a0, 4112
740 ; RV64-NEXT: addiw a0, a0, 257
741 ; RV64-NEXT: slli a1, a0, 32
742 ; RV64-NEXT: add a0, a0, a1
743 ; RV64-NEXT: vmul.vx v8, v8, a0
744 ; RV64-NEXT: li a0, 56
745 ; RV64-NEXT: vsrl.vx v8, v8, a0
748 ; CHECK-ZVBB-LABEL: ctpop_nxv1i64:
749 ; CHECK-ZVBB: # %bb.0:
750 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
751 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
752 ; CHECK-ZVBB-NEXT: ret
753 %a = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> %va)
754 ret <vscale x 1 x i64> %a
756 declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
758 define <vscale x 2 x i64> @ctpop_nxv2i64(<vscale x 2 x i64> %va) {
759 ; RV32-LABEL: ctpop_nxv2i64:
761 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
762 ; RV32-NEXT: vsrl.vi v10, v8, 1
763 ; RV32-NEXT: lui a0, 349525
764 ; RV32-NEXT: addi a0, a0, 1365
765 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
766 ; RV32-NEXT: vmv.v.x v12, a0
767 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
768 ; RV32-NEXT: vand.vv v10, v10, v12
769 ; RV32-NEXT: vsub.vv v8, v8, v10
770 ; RV32-NEXT: lui a0, 209715
771 ; RV32-NEXT: addi a0, a0, 819
772 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
773 ; RV32-NEXT: vmv.v.x v10, a0
774 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
775 ; RV32-NEXT: vand.vv v12, v8, v10
776 ; RV32-NEXT: vsrl.vi v8, v8, 2
777 ; RV32-NEXT: vand.vv v8, v8, v10
778 ; RV32-NEXT: vadd.vv v8, v12, v8
779 ; RV32-NEXT: vsrl.vi v10, v8, 4
780 ; RV32-NEXT: vadd.vv v8, v8, v10
781 ; RV32-NEXT: lui a0, 61681
782 ; RV32-NEXT: addi a0, a0, -241
783 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
784 ; RV32-NEXT: vmv.v.x v10, a0
785 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
786 ; RV32-NEXT: vand.vv v8, v8, v10
787 ; RV32-NEXT: lui a0, 4112
788 ; RV32-NEXT: addi a0, a0, 257
789 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
790 ; RV32-NEXT: vmv.v.x v10, a0
791 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
792 ; RV32-NEXT: vmul.vv v8, v8, v10
793 ; RV32-NEXT: li a0, 56
794 ; RV32-NEXT: vsrl.vx v8, v8, a0
797 ; RV64-LABEL: ctpop_nxv2i64:
799 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
800 ; RV64-NEXT: vsrl.vi v10, v8, 1
801 ; RV64-NEXT: lui a0, 349525
802 ; RV64-NEXT: addiw a0, a0, 1365
803 ; RV64-NEXT: slli a1, a0, 32
804 ; RV64-NEXT: add a0, a0, a1
805 ; RV64-NEXT: vand.vx v10, v10, a0
806 ; RV64-NEXT: vsub.vv v8, v8, v10
807 ; RV64-NEXT: lui a0, 209715
808 ; RV64-NEXT: addiw a0, a0, 819
809 ; RV64-NEXT: slli a1, a0, 32
810 ; RV64-NEXT: add a0, a0, a1
811 ; RV64-NEXT: vand.vx v10, v8, a0
812 ; RV64-NEXT: vsrl.vi v8, v8, 2
813 ; RV64-NEXT: vand.vx v8, v8, a0
814 ; RV64-NEXT: vadd.vv v8, v10, v8
815 ; RV64-NEXT: vsrl.vi v10, v8, 4
816 ; RV64-NEXT: vadd.vv v8, v8, v10
817 ; RV64-NEXT: lui a0, 61681
818 ; RV64-NEXT: addiw a0, a0, -241
819 ; RV64-NEXT: slli a1, a0, 32
820 ; RV64-NEXT: add a0, a0, a1
821 ; RV64-NEXT: vand.vx v8, v8, a0
822 ; RV64-NEXT: lui a0, 4112
823 ; RV64-NEXT: addiw a0, a0, 257
824 ; RV64-NEXT: slli a1, a0, 32
825 ; RV64-NEXT: add a0, a0, a1
826 ; RV64-NEXT: vmul.vx v8, v8, a0
827 ; RV64-NEXT: li a0, 56
828 ; RV64-NEXT: vsrl.vx v8, v8, a0
831 ; CHECK-ZVBB-LABEL: ctpop_nxv2i64:
832 ; CHECK-ZVBB: # %bb.0:
833 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
834 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
835 ; CHECK-ZVBB-NEXT: ret
836 %a = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %va)
837 ret <vscale x 2 x i64> %a
839 declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
841 define <vscale x 4 x i64> @ctpop_nxv4i64(<vscale x 4 x i64> %va) {
842 ; RV32-LABEL: ctpop_nxv4i64:
844 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
845 ; RV32-NEXT: vsrl.vi v12, v8, 1
846 ; RV32-NEXT: lui a0, 349525
847 ; RV32-NEXT: addi a0, a0, 1365
848 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
849 ; RV32-NEXT: vmv.v.x v16, a0
850 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
851 ; RV32-NEXT: vand.vv v12, v12, v16
852 ; RV32-NEXT: vsub.vv v8, v8, v12
853 ; RV32-NEXT: lui a0, 209715
854 ; RV32-NEXT: addi a0, a0, 819
855 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
856 ; RV32-NEXT: vmv.v.x v12, a0
857 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
858 ; RV32-NEXT: vand.vv v16, v8, v12
859 ; RV32-NEXT: vsrl.vi v8, v8, 2
860 ; RV32-NEXT: vand.vv v8, v8, v12
861 ; RV32-NEXT: vadd.vv v8, v16, v8
862 ; RV32-NEXT: vsrl.vi v12, v8, 4
863 ; RV32-NEXT: vadd.vv v8, v8, v12
864 ; RV32-NEXT: lui a0, 61681
865 ; RV32-NEXT: addi a0, a0, -241
866 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
867 ; RV32-NEXT: vmv.v.x v12, a0
868 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
869 ; RV32-NEXT: vand.vv v8, v8, v12
870 ; RV32-NEXT: lui a0, 4112
871 ; RV32-NEXT: addi a0, a0, 257
872 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
873 ; RV32-NEXT: vmv.v.x v12, a0
874 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
875 ; RV32-NEXT: vmul.vv v8, v8, v12
876 ; RV32-NEXT: li a0, 56
877 ; RV32-NEXT: vsrl.vx v8, v8, a0
880 ; RV64-LABEL: ctpop_nxv4i64:
882 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
883 ; RV64-NEXT: vsrl.vi v12, v8, 1
884 ; RV64-NEXT: lui a0, 349525
885 ; RV64-NEXT: addiw a0, a0, 1365
886 ; RV64-NEXT: slli a1, a0, 32
887 ; RV64-NEXT: add a0, a0, a1
888 ; RV64-NEXT: vand.vx v12, v12, a0
889 ; RV64-NEXT: vsub.vv v8, v8, v12
890 ; RV64-NEXT: lui a0, 209715
891 ; RV64-NEXT: addiw a0, a0, 819
892 ; RV64-NEXT: slli a1, a0, 32
893 ; RV64-NEXT: add a0, a0, a1
894 ; RV64-NEXT: vand.vx v12, v8, a0
895 ; RV64-NEXT: vsrl.vi v8, v8, 2
896 ; RV64-NEXT: vand.vx v8, v8, a0
897 ; RV64-NEXT: vadd.vv v8, v12, v8
898 ; RV64-NEXT: vsrl.vi v12, v8, 4
899 ; RV64-NEXT: vadd.vv v8, v8, v12
900 ; RV64-NEXT: lui a0, 61681
901 ; RV64-NEXT: addiw a0, a0, -241
902 ; RV64-NEXT: slli a1, a0, 32
903 ; RV64-NEXT: add a0, a0, a1
904 ; RV64-NEXT: vand.vx v8, v8, a0
905 ; RV64-NEXT: lui a0, 4112
906 ; RV64-NEXT: addiw a0, a0, 257
907 ; RV64-NEXT: slli a1, a0, 32
908 ; RV64-NEXT: add a0, a0, a1
909 ; RV64-NEXT: vmul.vx v8, v8, a0
910 ; RV64-NEXT: li a0, 56
911 ; RV64-NEXT: vsrl.vx v8, v8, a0
914 ; CHECK-ZVBB-LABEL: ctpop_nxv4i64:
915 ; CHECK-ZVBB: # %bb.0:
916 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
917 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
918 ; CHECK-ZVBB-NEXT: ret
919 %a = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> %va)
920 ret <vscale x 4 x i64> %a
922 declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
924 define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) {
925 ; RV32-LABEL: ctpop_nxv8i64:
927 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
928 ; RV32-NEXT: vsrl.vi v16, v8, 1
929 ; RV32-NEXT: lui a0, 349525
930 ; RV32-NEXT: addi a0, a0, 1365
931 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
932 ; RV32-NEXT: vmv.v.x v24, a0
933 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
934 ; RV32-NEXT: vand.vv v16, v16, v24
935 ; RV32-NEXT: vsub.vv v8, v8, v16
936 ; RV32-NEXT: lui a0, 209715
937 ; RV32-NEXT: addi a0, a0, 819
938 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
939 ; RV32-NEXT: vmv.v.x v16, a0
940 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
941 ; RV32-NEXT: vand.vv v24, v8, v16
942 ; RV32-NEXT: vsrl.vi v8, v8, 2
943 ; RV32-NEXT: vand.vv v8, v8, v16
944 ; RV32-NEXT: vadd.vv v8, v24, v8
945 ; RV32-NEXT: vsrl.vi v16, v8, 4
946 ; RV32-NEXT: vadd.vv v8, v8, v16
947 ; RV32-NEXT: lui a0, 61681
948 ; RV32-NEXT: addi a0, a0, -241
949 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
950 ; RV32-NEXT: vmv.v.x v16, a0
951 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
952 ; RV32-NEXT: vand.vv v8, v8, v16
953 ; RV32-NEXT: lui a0, 4112
954 ; RV32-NEXT: addi a0, a0, 257
955 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
956 ; RV32-NEXT: vmv.v.x v16, a0
957 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
958 ; RV32-NEXT: vmul.vv v8, v8, v16
959 ; RV32-NEXT: li a0, 56
960 ; RV32-NEXT: vsrl.vx v8, v8, a0
963 ; RV64-LABEL: ctpop_nxv8i64:
965 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
966 ; RV64-NEXT: vsrl.vi v16, v8, 1
967 ; RV64-NEXT: lui a0, 349525
968 ; RV64-NEXT: addiw a0, a0, 1365
969 ; RV64-NEXT: slli a1, a0, 32
970 ; RV64-NEXT: add a0, a0, a1
971 ; RV64-NEXT: vand.vx v16, v16, a0
972 ; RV64-NEXT: vsub.vv v8, v8, v16
973 ; RV64-NEXT: lui a0, 209715
974 ; RV64-NEXT: addiw a0, a0, 819
975 ; RV64-NEXT: slli a1, a0, 32
976 ; RV64-NEXT: add a0, a0, a1
977 ; RV64-NEXT: vand.vx v16, v8, a0
978 ; RV64-NEXT: vsrl.vi v8, v8, 2
979 ; RV64-NEXT: vand.vx v8, v8, a0
980 ; RV64-NEXT: vadd.vv v8, v16, v8
981 ; RV64-NEXT: vsrl.vi v16, v8, 4
982 ; RV64-NEXT: vadd.vv v8, v8, v16
983 ; RV64-NEXT: lui a0, 61681
984 ; RV64-NEXT: addiw a0, a0, -241
985 ; RV64-NEXT: slli a1, a0, 32
986 ; RV64-NEXT: add a0, a0, a1
987 ; RV64-NEXT: vand.vx v8, v8, a0
988 ; RV64-NEXT: lui a0, 4112
989 ; RV64-NEXT: addiw a0, a0, 257
990 ; RV64-NEXT: slli a1, a0, 32
991 ; RV64-NEXT: add a0, a0, a1
992 ; RV64-NEXT: vmul.vx v8, v8, a0
993 ; RV64-NEXT: li a0, 56
994 ; RV64-NEXT: vsrl.vx v8, v8, a0
997 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64:
998 ; CHECK-ZVBB: # %bb.0:
999 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1000 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1001 ; CHECK-ZVBB-NEXT: ret
1002 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1003 ret <vscale x 8 x i64> %a
1006 ; We always emit vcpop.v for the scalable vector
1007 define <vscale x 8 x i1> @ctpop_nxv8i64_ult_two(<vscale x 8 x i64> %va) {
1008 ; CHECK-LABEL: ctpop_nxv8i64_ult_two:
1010 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1011 ; CHECK-NEXT: vadd.vi v16, v8, -1
1012 ; CHECK-NEXT: vand.vv v8, v8, v16
1013 ; CHECK-NEXT: vmseq.vi v0, v8, 0
1016 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ult_two:
1017 ; CHECK-ZVBB: # %bb.0:
1018 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1019 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1020 ; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1
1021 ; CHECK-ZVBB-NEXT: ret
1022 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1023 %cmp = icmp ult <vscale x 8 x i64> %a, splat (i64 2)
1024 ret <vscale x 8 x i1> %cmp
1027 define <vscale x 8 x i1> @ctpop_nxv8i64_ugt_one(<vscale x 8 x i64> %va) {
1028 ; CHECK-LABEL: ctpop_nxv8i64_ugt_one:
1030 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1031 ; CHECK-NEXT: vadd.vi v16, v8, -1
1032 ; CHECK-NEXT: vand.vv v8, v8, v16
1033 ; CHECK-NEXT: vmsne.vi v0, v8, 0
1036 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ugt_one:
1037 ; CHECK-ZVBB: # %bb.0:
1038 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1039 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1040 ; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1
1041 ; CHECK-ZVBB-NEXT: ret
1042 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1043 %cmp = icmp ugt <vscale x 8 x i64> %a, splat (i64 1)
1044 ret <vscale x 8 x i1> %cmp
1047 define <vscale x 8 x i1> @ctpop_nxv8i64_eq_one(<vscale x 8 x i64> %va) {
1048 ; CHECK-LABEL: ctpop_nxv8i64_eq_one:
1050 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1051 ; CHECK-NEXT: vadd.vi v16, v8, -1
1052 ; CHECK-NEXT: vxor.vv v8, v8, v16
1053 ; CHECK-NEXT: vmsltu.vv v0, v16, v8
1056 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_eq_one:
1057 ; CHECK-ZVBB: # %bb.0:
1058 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1059 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1060 ; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1
1061 ; CHECK-ZVBB-NEXT: ret
1062 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1063 %cmp = icmp eq <vscale x 8 x i64> %a, splat (i64 1)
1064 ret <vscale x 8 x i1> %cmp
1067 define <vscale x 8 x i1> @ctpop_nxv8i64_ne_one(<vscale x 8 x i64> %va) {
1068 ; CHECK-LABEL: ctpop_nxv8i64_ne_one:
1070 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1071 ; CHECK-NEXT: vadd.vi v16, v8, -1
1072 ; CHECK-NEXT: vxor.vv v8, v8, v16
1073 ; CHECK-NEXT: vmsleu.vv v0, v8, v16
1076 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ne_one:
1077 ; CHECK-ZVBB: # %bb.0:
1078 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1079 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1080 ; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1
1081 ; CHECK-ZVBB-NEXT: ret
1082 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1083 %cmp = icmp ne <vscale x 8 x i64> %a, splat (i64 1)
1084 ret <vscale x 8 x i1> %cmp
1087 declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)