1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
7 define <vscale x 1 x i8> @ctpop_nxv1i8(<vscale x 1 x i8> %va) {
8 ; CHECK-LABEL: ctpop_nxv1i8:
10 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
11 ; CHECK-NEXT: vsrl.vi v9, v8, 1
12 ; CHECK-NEXT: li a0, 85
13 ; CHECK-NEXT: vand.vx v9, v9, a0
14 ; CHECK-NEXT: vsub.vv v8, v8, v9
15 ; CHECK-NEXT: li a0, 51
16 ; CHECK-NEXT: vand.vx v9, v8, a0
17 ; CHECK-NEXT: vsrl.vi v8, v8, 2
18 ; CHECK-NEXT: vand.vx v8, v8, a0
19 ; CHECK-NEXT: vadd.vv v8, v9, v8
20 ; CHECK-NEXT: vsrl.vi v9, v8, 4
21 ; CHECK-NEXT: vadd.vv v8, v8, v9
22 ; CHECK-NEXT: vand.vi v8, v8, 15
25 ; CHECK-ZVBB-LABEL: ctpop_nxv1i8:
26 ; CHECK-ZVBB: # %bb.0:
27 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
28 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
29 ; CHECK-ZVBB-NEXT: ret
30 %a = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> %va)
31 ret <vscale x 1 x i8> %a
33 declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
35 define <vscale x 2 x i8> @ctpop_nxv2i8(<vscale x 2 x i8> %va) {
36 ; CHECK-LABEL: ctpop_nxv2i8:
38 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
39 ; CHECK-NEXT: vsrl.vi v9, v8, 1
40 ; CHECK-NEXT: li a0, 85
41 ; CHECK-NEXT: vand.vx v9, v9, a0
42 ; CHECK-NEXT: vsub.vv v8, v8, v9
43 ; CHECK-NEXT: li a0, 51
44 ; CHECK-NEXT: vand.vx v9, v8, a0
45 ; CHECK-NEXT: vsrl.vi v8, v8, 2
46 ; CHECK-NEXT: vand.vx v8, v8, a0
47 ; CHECK-NEXT: vadd.vv v8, v9, v8
48 ; CHECK-NEXT: vsrl.vi v9, v8, 4
49 ; CHECK-NEXT: vadd.vv v8, v8, v9
50 ; CHECK-NEXT: vand.vi v8, v8, 15
53 ; CHECK-ZVBB-LABEL: ctpop_nxv2i8:
54 ; CHECK-ZVBB: # %bb.0:
55 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
56 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
57 ; CHECK-ZVBB-NEXT: ret
58 %a = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> %va)
59 ret <vscale x 2 x i8> %a
61 declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
63 define <vscale x 4 x i8> @ctpop_nxv4i8(<vscale x 4 x i8> %va) {
64 ; CHECK-LABEL: ctpop_nxv4i8:
66 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
67 ; CHECK-NEXT: vsrl.vi v9, v8, 1
68 ; CHECK-NEXT: li a0, 85
69 ; CHECK-NEXT: vand.vx v9, v9, a0
70 ; CHECK-NEXT: vsub.vv v8, v8, v9
71 ; CHECK-NEXT: li a0, 51
72 ; CHECK-NEXT: vand.vx v9, v8, a0
73 ; CHECK-NEXT: vsrl.vi v8, v8, 2
74 ; CHECK-NEXT: vand.vx v8, v8, a0
75 ; CHECK-NEXT: vadd.vv v8, v9, v8
76 ; CHECK-NEXT: vsrl.vi v9, v8, 4
77 ; CHECK-NEXT: vadd.vv v8, v8, v9
78 ; CHECK-NEXT: vand.vi v8, v8, 15
81 ; CHECK-ZVBB-LABEL: ctpop_nxv4i8:
82 ; CHECK-ZVBB: # %bb.0:
83 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
84 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
85 ; CHECK-ZVBB-NEXT: ret
86 %a = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> %va)
87 ret <vscale x 4 x i8> %a
89 declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
91 define <vscale x 8 x i8> @ctpop_nxv8i8(<vscale x 8 x i8> %va) {
92 ; CHECK-LABEL: ctpop_nxv8i8:
94 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
95 ; CHECK-NEXT: vsrl.vi v9, v8, 1
96 ; CHECK-NEXT: li a0, 85
97 ; CHECK-NEXT: vand.vx v9, v9, a0
98 ; CHECK-NEXT: vsub.vv v8, v8, v9
99 ; CHECK-NEXT: li a0, 51
100 ; CHECK-NEXT: vand.vx v9, v8, a0
101 ; CHECK-NEXT: vsrl.vi v8, v8, 2
102 ; CHECK-NEXT: vand.vx v8, v8, a0
103 ; CHECK-NEXT: vadd.vv v8, v9, v8
104 ; CHECK-NEXT: vsrl.vi v9, v8, 4
105 ; CHECK-NEXT: vadd.vv v8, v8, v9
106 ; CHECK-NEXT: vand.vi v8, v8, 15
109 ; CHECK-ZVBB-LABEL: ctpop_nxv8i8:
110 ; CHECK-ZVBB: # %bb.0:
111 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
112 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
113 ; CHECK-ZVBB-NEXT: ret
114 %a = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> %va)
115 ret <vscale x 8 x i8> %a
117 declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
119 define <vscale x 16 x i8> @ctpop_nxv16i8(<vscale x 16 x i8> %va) {
120 ; CHECK-LABEL: ctpop_nxv16i8:
122 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
123 ; CHECK-NEXT: vsrl.vi v10, v8, 1
124 ; CHECK-NEXT: li a0, 85
125 ; CHECK-NEXT: vand.vx v10, v10, a0
126 ; CHECK-NEXT: vsub.vv v8, v8, v10
127 ; CHECK-NEXT: li a0, 51
128 ; CHECK-NEXT: vand.vx v10, v8, a0
129 ; CHECK-NEXT: vsrl.vi v8, v8, 2
130 ; CHECK-NEXT: vand.vx v8, v8, a0
131 ; CHECK-NEXT: vadd.vv v8, v10, v8
132 ; CHECK-NEXT: vsrl.vi v10, v8, 4
133 ; CHECK-NEXT: vadd.vv v8, v8, v10
134 ; CHECK-NEXT: vand.vi v8, v8, 15
137 ; CHECK-ZVBB-LABEL: ctpop_nxv16i8:
138 ; CHECK-ZVBB: # %bb.0:
139 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
140 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
141 ; CHECK-ZVBB-NEXT: ret
142 %a = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %va)
143 ret <vscale x 16 x i8> %a
145 declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
147 define <vscale x 32 x i8> @ctpop_nxv32i8(<vscale x 32 x i8> %va) {
148 ; CHECK-LABEL: ctpop_nxv32i8:
150 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
151 ; CHECK-NEXT: vsrl.vi v12, v8, 1
152 ; CHECK-NEXT: li a0, 85
153 ; CHECK-NEXT: vand.vx v12, v12, a0
154 ; CHECK-NEXT: vsub.vv v8, v8, v12
155 ; CHECK-NEXT: li a0, 51
156 ; CHECK-NEXT: vand.vx v12, v8, a0
157 ; CHECK-NEXT: vsrl.vi v8, v8, 2
158 ; CHECK-NEXT: vand.vx v8, v8, a0
159 ; CHECK-NEXT: vadd.vv v8, v12, v8
160 ; CHECK-NEXT: vsrl.vi v12, v8, 4
161 ; CHECK-NEXT: vadd.vv v8, v8, v12
162 ; CHECK-NEXT: vand.vi v8, v8, 15
165 ; CHECK-ZVBB-LABEL: ctpop_nxv32i8:
166 ; CHECK-ZVBB: # %bb.0:
167 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma
168 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
169 ; CHECK-ZVBB-NEXT: ret
170 %a = call <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8> %va)
171 ret <vscale x 32 x i8> %a
173 declare <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8>)
175 define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) {
176 ; CHECK-LABEL: ctpop_nxv64i8:
178 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
179 ; CHECK-NEXT: vsrl.vi v16, v8, 1
180 ; CHECK-NEXT: li a0, 85
181 ; CHECK-NEXT: vand.vx v16, v16, a0
182 ; CHECK-NEXT: vsub.vv v8, v8, v16
183 ; CHECK-NEXT: li a0, 51
184 ; CHECK-NEXT: vand.vx v16, v8, a0
185 ; CHECK-NEXT: vsrl.vi v8, v8, 2
186 ; CHECK-NEXT: vand.vx v8, v8, a0
187 ; CHECK-NEXT: vadd.vv v8, v16, v8
188 ; CHECK-NEXT: vsrl.vi v16, v8, 4
189 ; CHECK-NEXT: vadd.vv v8, v8, v16
190 ; CHECK-NEXT: vand.vi v8, v8, 15
193 ; CHECK-ZVBB-LABEL: ctpop_nxv64i8:
194 ; CHECK-ZVBB: # %bb.0:
195 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma
196 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
197 ; CHECK-ZVBB-NEXT: ret
198 %a = call <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8> %va)
199 ret <vscale x 64 x i8> %a
201 declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>)
203 define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
204 ; CHECK-LABEL: ctpop_nxv1i16:
206 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
207 ; CHECK-NEXT: vsrl.vi v9, v8, 1
208 ; CHECK-NEXT: lui a0, 5
209 ; CHECK-NEXT: addi a0, a0, 1365
210 ; CHECK-NEXT: vand.vx v9, v9, a0
211 ; CHECK-NEXT: vsub.vv v8, v8, v9
212 ; CHECK-NEXT: lui a0, 3
213 ; CHECK-NEXT: addi a0, a0, 819
214 ; CHECK-NEXT: vand.vx v9, v8, a0
215 ; CHECK-NEXT: vsrl.vi v8, v8, 2
216 ; CHECK-NEXT: vand.vx v8, v8, a0
217 ; CHECK-NEXT: vadd.vv v8, v9, v8
218 ; CHECK-NEXT: vsrl.vi v9, v8, 4
219 ; CHECK-NEXT: vadd.vv v8, v8, v9
220 ; CHECK-NEXT: lui a0, 1
221 ; CHECK-NEXT: addi a0, a0, -241
222 ; CHECK-NEXT: vand.vx v8, v8, a0
223 ; CHECK-NEXT: li a0, 257
224 ; CHECK-NEXT: vmul.vx v8, v8, a0
225 ; CHECK-NEXT: vsrl.vi v8, v8, 8
228 ; CHECK-ZVBB-LABEL: ctpop_nxv1i16:
229 ; CHECK-ZVBB: # %bb.0:
230 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
231 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
232 ; CHECK-ZVBB-NEXT: ret
233 %a = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> %va)
234 ret <vscale x 1 x i16> %a
236 declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
238 define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
239 ; CHECK-LABEL: ctpop_nxv2i16:
241 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
242 ; CHECK-NEXT: vsrl.vi v9, v8, 1
243 ; CHECK-NEXT: lui a0, 5
244 ; CHECK-NEXT: addi a0, a0, 1365
245 ; CHECK-NEXT: vand.vx v9, v9, a0
246 ; CHECK-NEXT: vsub.vv v8, v8, v9
247 ; CHECK-NEXT: lui a0, 3
248 ; CHECK-NEXT: addi a0, a0, 819
249 ; CHECK-NEXT: vand.vx v9, v8, a0
250 ; CHECK-NEXT: vsrl.vi v8, v8, 2
251 ; CHECK-NEXT: vand.vx v8, v8, a0
252 ; CHECK-NEXT: vadd.vv v8, v9, v8
253 ; CHECK-NEXT: vsrl.vi v9, v8, 4
254 ; CHECK-NEXT: vadd.vv v8, v8, v9
255 ; CHECK-NEXT: lui a0, 1
256 ; CHECK-NEXT: addi a0, a0, -241
257 ; CHECK-NEXT: vand.vx v8, v8, a0
258 ; CHECK-NEXT: li a0, 257
259 ; CHECK-NEXT: vmul.vx v8, v8, a0
260 ; CHECK-NEXT: vsrl.vi v8, v8, 8
263 ; CHECK-ZVBB-LABEL: ctpop_nxv2i16:
264 ; CHECK-ZVBB: # %bb.0:
265 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
266 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
267 ; CHECK-ZVBB-NEXT: ret
268 %a = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> %va)
269 ret <vscale x 2 x i16> %a
271 declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
273 define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
274 ; CHECK-LABEL: ctpop_nxv4i16:
276 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
277 ; CHECK-NEXT: vsrl.vi v9, v8, 1
278 ; CHECK-NEXT: lui a0, 5
279 ; CHECK-NEXT: addi a0, a0, 1365
280 ; CHECK-NEXT: vand.vx v9, v9, a0
281 ; CHECK-NEXT: vsub.vv v8, v8, v9
282 ; CHECK-NEXT: lui a0, 3
283 ; CHECK-NEXT: addi a0, a0, 819
284 ; CHECK-NEXT: vand.vx v9, v8, a0
285 ; CHECK-NEXT: vsrl.vi v8, v8, 2
286 ; CHECK-NEXT: vand.vx v8, v8, a0
287 ; CHECK-NEXT: vadd.vv v8, v9, v8
288 ; CHECK-NEXT: vsrl.vi v9, v8, 4
289 ; CHECK-NEXT: vadd.vv v8, v8, v9
290 ; CHECK-NEXT: lui a0, 1
291 ; CHECK-NEXT: addi a0, a0, -241
292 ; CHECK-NEXT: vand.vx v8, v8, a0
293 ; CHECK-NEXT: li a0, 257
294 ; CHECK-NEXT: vmul.vx v8, v8, a0
295 ; CHECK-NEXT: vsrl.vi v8, v8, 8
298 ; CHECK-ZVBB-LABEL: ctpop_nxv4i16:
299 ; CHECK-ZVBB: # %bb.0:
300 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
301 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
302 ; CHECK-ZVBB-NEXT: ret
303 %a = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> %va)
304 ret <vscale x 4 x i16> %a
306 declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
308 define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
309 ; CHECK-LABEL: ctpop_nxv8i16:
311 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
312 ; CHECK-NEXT: vsrl.vi v10, v8, 1
313 ; CHECK-NEXT: lui a0, 5
314 ; CHECK-NEXT: addi a0, a0, 1365
315 ; CHECK-NEXT: vand.vx v10, v10, a0
316 ; CHECK-NEXT: vsub.vv v8, v8, v10
317 ; CHECK-NEXT: lui a0, 3
318 ; CHECK-NEXT: addi a0, a0, 819
319 ; CHECK-NEXT: vand.vx v10, v8, a0
320 ; CHECK-NEXT: vsrl.vi v8, v8, 2
321 ; CHECK-NEXT: vand.vx v8, v8, a0
322 ; CHECK-NEXT: vadd.vv v8, v10, v8
323 ; CHECK-NEXT: vsrl.vi v10, v8, 4
324 ; CHECK-NEXT: vadd.vv v8, v8, v10
325 ; CHECK-NEXT: lui a0, 1
326 ; CHECK-NEXT: addi a0, a0, -241
327 ; CHECK-NEXT: vand.vx v8, v8, a0
328 ; CHECK-NEXT: li a0, 257
329 ; CHECK-NEXT: vmul.vx v8, v8, a0
330 ; CHECK-NEXT: vsrl.vi v8, v8, 8
333 ; CHECK-ZVBB-LABEL: ctpop_nxv8i16:
334 ; CHECK-ZVBB: # %bb.0:
335 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
336 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
337 ; CHECK-ZVBB-NEXT: ret
338 %a = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %va)
339 ret <vscale x 8 x i16> %a
341 declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
343 define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
344 ; CHECK-LABEL: ctpop_nxv16i16:
346 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
347 ; CHECK-NEXT: vsrl.vi v12, v8, 1
348 ; CHECK-NEXT: lui a0, 5
349 ; CHECK-NEXT: addi a0, a0, 1365
350 ; CHECK-NEXT: vand.vx v12, v12, a0
351 ; CHECK-NEXT: vsub.vv v8, v8, v12
352 ; CHECK-NEXT: lui a0, 3
353 ; CHECK-NEXT: addi a0, a0, 819
354 ; CHECK-NEXT: vand.vx v12, v8, a0
355 ; CHECK-NEXT: vsrl.vi v8, v8, 2
356 ; CHECK-NEXT: vand.vx v8, v8, a0
357 ; CHECK-NEXT: vadd.vv v8, v12, v8
358 ; CHECK-NEXT: vsrl.vi v12, v8, 4
359 ; CHECK-NEXT: vadd.vv v8, v8, v12
360 ; CHECK-NEXT: lui a0, 1
361 ; CHECK-NEXT: addi a0, a0, -241
362 ; CHECK-NEXT: vand.vx v8, v8, a0
363 ; CHECK-NEXT: li a0, 257
364 ; CHECK-NEXT: vmul.vx v8, v8, a0
365 ; CHECK-NEXT: vsrl.vi v8, v8, 8
368 ; CHECK-ZVBB-LABEL: ctpop_nxv16i16:
369 ; CHECK-ZVBB: # %bb.0:
370 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
371 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
372 ; CHECK-ZVBB-NEXT: ret
373 %a = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> %va)
374 ret <vscale x 16 x i16> %a
376 declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
378 define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
379 ; CHECK-LABEL: ctpop_nxv32i16:
381 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
382 ; CHECK-NEXT: vsrl.vi v16, v8, 1
383 ; CHECK-NEXT: lui a0, 5
384 ; CHECK-NEXT: addi a0, a0, 1365
385 ; CHECK-NEXT: vand.vx v16, v16, a0
386 ; CHECK-NEXT: vsub.vv v8, v8, v16
387 ; CHECK-NEXT: lui a0, 3
388 ; CHECK-NEXT: addi a0, a0, 819
389 ; CHECK-NEXT: vand.vx v16, v8, a0
390 ; CHECK-NEXT: vsrl.vi v8, v8, 2
391 ; CHECK-NEXT: vand.vx v8, v8, a0
392 ; CHECK-NEXT: vadd.vv v8, v16, v8
393 ; CHECK-NEXT: vsrl.vi v16, v8, 4
394 ; CHECK-NEXT: vadd.vv v8, v8, v16
395 ; CHECK-NEXT: lui a0, 1
396 ; CHECK-NEXT: addi a0, a0, -241
397 ; CHECK-NEXT: vand.vx v8, v8, a0
398 ; CHECK-NEXT: li a0, 257
399 ; CHECK-NEXT: vmul.vx v8, v8, a0
400 ; CHECK-NEXT: vsrl.vi v8, v8, 8
403 ; CHECK-ZVBB-LABEL: ctpop_nxv32i16:
404 ; CHECK-ZVBB: # %bb.0:
405 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
406 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
407 ; CHECK-ZVBB-NEXT: ret
408 %a = call <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16> %va)
409 ret <vscale x 32 x i16> %a
411 declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>)
413 define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
414 ; CHECK-LABEL: ctpop_nxv1i32:
416 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
417 ; CHECK-NEXT: vsrl.vi v9, v8, 1
418 ; CHECK-NEXT: lui a0, 349525
419 ; CHECK-NEXT: addi a0, a0, 1365
420 ; CHECK-NEXT: vand.vx v9, v9, a0
421 ; CHECK-NEXT: vsub.vv v8, v8, v9
422 ; CHECK-NEXT: lui a0, 209715
423 ; CHECK-NEXT: addi a0, a0, 819
424 ; CHECK-NEXT: vand.vx v9, v8, a0
425 ; CHECK-NEXT: vsrl.vi v8, v8, 2
426 ; CHECK-NEXT: vand.vx v8, v8, a0
427 ; CHECK-NEXT: vadd.vv v8, v9, v8
428 ; CHECK-NEXT: vsrl.vi v9, v8, 4
429 ; CHECK-NEXT: vadd.vv v8, v8, v9
430 ; CHECK-NEXT: lui a0, 61681
431 ; CHECK-NEXT: addi a0, a0, -241
432 ; CHECK-NEXT: vand.vx v8, v8, a0
433 ; CHECK-NEXT: lui a0, 4112
434 ; CHECK-NEXT: addi a0, a0, 257
435 ; CHECK-NEXT: vmul.vx v8, v8, a0
436 ; CHECK-NEXT: vsrl.vi v8, v8, 24
439 ; CHECK-ZVBB-LABEL: ctpop_nxv1i32:
440 ; CHECK-ZVBB: # %bb.0:
441 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
442 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
443 ; CHECK-ZVBB-NEXT: ret
444 %a = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> %va)
445 ret <vscale x 1 x i32> %a
447 declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
449 define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
450 ; CHECK-LABEL: ctpop_nxv2i32:
452 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
453 ; CHECK-NEXT: vsrl.vi v9, v8, 1
454 ; CHECK-NEXT: lui a0, 349525
455 ; CHECK-NEXT: addi a0, a0, 1365
456 ; CHECK-NEXT: vand.vx v9, v9, a0
457 ; CHECK-NEXT: vsub.vv v8, v8, v9
458 ; CHECK-NEXT: lui a0, 209715
459 ; CHECK-NEXT: addi a0, a0, 819
460 ; CHECK-NEXT: vand.vx v9, v8, a0
461 ; CHECK-NEXT: vsrl.vi v8, v8, 2
462 ; CHECK-NEXT: vand.vx v8, v8, a0
463 ; CHECK-NEXT: vadd.vv v8, v9, v8
464 ; CHECK-NEXT: vsrl.vi v9, v8, 4
465 ; CHECK-NEXT: vadd.vv v8, v8, v9
466 ; CHECK-NEXT: lui a0, 61681
467 ; CHECK-NEXT: addi a0, a0, -241
468 ; CHECK-NEXT: vand.vx v8, v8, a0
469 ; CHECK-NEXT: lui a0, 4112
470 ; CHECK-NEXT: addi a0, a0, 257
471 ; CHECK-NEXT: vmul.vx v8, v8, a0
472 ; CHECK-NEXT: vsrl.vi v8, v8, 24
475 ; CHECK-ZVBB-LABEL: ctpop_nxv2i32:
476 ; CHECK-ZVBB: # %bb.0:
477 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
478 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
479 ; CHECK-ZVBB-NEXT: ret
480 %a = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> %va)
481 ret <vscale x 2 x i32> %a
483 declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
485 define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
486 ; CHECK-LABEL: ctpop_nxv4i32:
488 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
489 ; CHECK-NEXT: vsrl.vi v10, v8, 1
490 ; CHECK-NEXT: lui a0, 349525
491 ; CHECK-NEXT: addi a0, a0, 1365
492 ; CHECK-NEXT: vand.vx v10, v10, a0
493 ; CHECK-NEXT: vsub.vv v8, v8, v10
494 ; CHECK-NEXT: lui a0, 209715
495 ; CHECK-NEXT: addi a0, a0, 819
496 ; CHECK-NEXT: vand.vx v10, v8, a0
497 ; CHECK-NEXT: vsrl.vi v8, v8, 2
498 ; CHECK-NEXT: vand.vx v8, v8, a0
499 ; CHECK-NEXT: vadd.vv v8, v10, v8
500 ; CHECK-NEXT: vsrl.vi v10, v8, 4
501 ; CHECK-NEXT: vadd.vv v8, v8, v10
502 ; CHECK-NEXT: lui a0, 61681
503 ; CHECK-NEXT: addi a0, a0, -241
504 ; CHECK-NEXT: vand.vx v8, v8, a0
505 ; CHECK-NEXT: lui a0, 4112
506 ; CHECK-NEXT: addi a0, a0, 257
507 ; CHECK-NEXT: vmul.vx v8, v8, a0
508 ; CHECK-NEXT: vsrl.vi v8, v8, 24
511 ; CHECK-ZVBB-LABEL: ctpop_nxv4i32:
512 ; CHECK-ZVBB: # %bb.0:
513 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
514 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
515 ; CHECK-ZVBB-NEXT: ret
516 %a = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %va)
517 ret <vscale x 4 x i32> %a
519 declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
521 define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
522 ; CHECK-LABEL: ctpop_nxv8i32:
524 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
525 ; CHECK-NEXT: vsrl.vi v12, v8, 1
526 ; CHECK-NEXT: lui a0, 349525
527 ; CHECK-NEXT: addi a0, a0, 1365
528 ; CHECK-NEXT: vand.vx v12, v12, a0
529 ; CHECK-NEXT: vsub.vv v8, v8, v12
530 ; CHECK-NEXT: lui a0, 209715
531 ; CHECK-NEXT: addi a0, a0, 819
532 ; CHECK-NEXT: vand.vx v12, v8, a0
533 ; CHECK-NEXT: vsrl.vi v8, v8, 2
534 ; CHECK-NEXT: vand.vx v8, v8, a0
535 ; CHECK-NEXT: vadd.vv v8, v12, v8
536 ; CHECK-NEXT: vsrl.vi v12, v8, 4
537 ; CHECK-NEXT: vadd.vv v8, v8, v12
538 ; CHECK-NEXT: lui a0, 61681
539 ; CHECK-NEXT: addi a0, a0, -241
540 ; CHECK-NEXT: vand.vx v8, v8, a0
541 ; CHECK-NEXT: lui a0, 4112
542 ; CHECK-NEXT: addi a0, a0, 257
543 ; CHECK-NEXT: vmul.vx v8, v8, a0
544 ; CHECK-NEXT: vsrl.vi v8, v8, 24
547 ; CHECK-ZVBB-LABEL: ctpop_nxv8i32:
548 ; CHECK-ZVBB: # %bb.0:
549 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m4, ta, ma
550 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
551 ; CHECK-ZVBB-NEXT: ret
552 %a = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> %va)
553 ret <vscale x 8 x i32> %a
555 declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
557 define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) {
558 ; CHECK-LABEL: ctpop_nxv16i32:
560 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
561 ; CHECK-NEXT: vsrl.vi v16, v8, 1
562 ; CHECK-NEXT: lui a0, 349525
563 ; CHECK-NEXT: addi a0, a0, 1365
564 ; CHECK-NEXT: vand.vx v16, v16, a0
565 ; CHECK-NEXT: vsub.vv v8, v8, v16
566 ; CHECK-NEXT: lui a0, 209715
567 ; CHECK-NEXT: addi a0, a0, 819
568 ; CHECK-NEXT: vand.vx v16, v8, a0
569 ; CHECK-NEXT: vsrl.vi v8, v8, 2
570 ; CHECK-NEXT: vand.vx v8, v8, a0
571 ; CHECK-NEXT: vadd.vv v8, v16, v8
572 ; CHECK-NEXT: vsrl.vi v16, v8, 4
573 ; CHECK-NEXT: vadd.vv v8, v8, v16
574 ; CHECK-NEXT: lui a0, 61681
575 ; CHECK-NEXT: addi a0, a0, -241
576 ; CHECK-NEXT: vand.vx v8, v8, a0
577 ; CHECK-NEXT: lui a0, 4112
578 ; CHECK-NEXT: addi a0, a0, 257
579 ; CHECK-NEXT: vmul.vx v8, v8, a0
580 ; CHECK-NEXT: vsrl.vi v8, v8, 24
583 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32:
584 ; CHECK-ZVBB: # %bb.0:
585 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
586 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
587 ; CHECK-ZVBB-NEXT: ret
588 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
589 ret <vscale x 16 x i32> %a
592 ; We always emit vcpop.v for the scalable vector
593 define <vscale x 16 x i1> @ctpop_nxv16i32_ult_two(<vscale x 16 x i32> %va) {
594 ; CHECK-LABEL: ctpop_nxv16i32_ult_two:
596 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
597 ; CHECK-NEXT: vadd.vi v16, v8, -1
598 ; CHECK-NEXT: vand.vv v8, v8, v16
599 ; CHECK-NEXT: vmseq.vi v0, v8, 0
602 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ult_two:
603 ; CHECK-ZVBB: # %bb.0:
604 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
605 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
606 ; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1
607 ; CHECK-ZVBB-NEXT: ret
608 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
609 %cmp = icmp ult <vscale x 16 x i32> %a, shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 2, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer)
610 ret <vscale x 16 x i1> %cmp
613 define <vscale x 16 x i1> @ctpop_nxv16i32_ugt_one(<vscale x 16 x i32> %va) {
614 ; CHECK-LABEL: ctpop_nxv16i32_ugt_one:
616 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
617 ; CHECK-NEXT: vadd.vi v16, v8, -1
618 ; CHECK-NEXT: vand.vv v8, v8, v16
619 ; CHECK-NEXT: vmsne.vi v0, v8, 0
622 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ugt_one:
623 ; CHECK-ZVBB: # %bb.0:
624 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
625 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
626 ; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1
627 ; CHECK-ZVBB-NEXT: ret
628 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
629 %cmp = icmp ugt <vscale x 16 x i32> %a, shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer)
630 ret <vscale x 16 x i1> %cmp
633 define <vscale x 16 x i1> @ctpop_nxv16i32_eq_one(<vscale x 16 x i32> %va) {
634 ; CHECK-LABEL: ctpop_nxv16i32_eq_one:
636 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
637 ; CHECK-NEXT: vadd.vi v16, v8, -1
638 ; CHECK-NEXT: vand.vv v16, v8, v16
639 ; CHECK-NEXT: vmseq.vi v24, v16, 0
640 ; CHECK-NEXT: vmsne.vi v16, v8, 0
641 ; CHECK-NEXT: vmand.mm v0, v16, v24
644 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_eq_one:
645 ; CHECK-ZVBB: # %bb.0:
646 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
647 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
648 ; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1
649 ; CHECK-ZVBB-NEXT: ret
650 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
651 %cmp = icmp eq <vscale x 16 x i32> %a, shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer)
652 ret <vscale x 16 x i1> %cmp
655 define <vscale x 16 x i1> @ctpop_nxv16i32_ne_one(<vscale x 16 x i32> %va) {
656 ; CHECK-LABEL: ctpop_nxv16i32_ne_one:
658 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
659 ; CHECK-NEXT: vadd.vi v16, v8, -1
660 ; CHECK-NEXT: vand.vv v16, v8, v16
661 ; CHECK-NEXT: vmsne.vi v24, v16, 0
662 ; CHECK-NEXT: vmseq.vi v16, v8, 0
663 ; CHECK-NEXT: vmor.mm v0, v16, v24
666 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ne_one:
667 ; CHECK-ZVBB: # %bb.0:
668 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
669 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
670 ; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1
671 ; CHECK-ZVBB-NEXT: ret
672 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
673 %cmp = icmp ne <vscale x 16 x i32> %a, shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer)
674 ret <vscale x 16 x i1> %cmp
677 declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
679 define <vscale x 1 x i64> @ctpop_nxv1i64(<vscale x 1 x i64> %va) {
680 ; RV32-LABEL: ctpop_nxv1i64:
682 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
683 ; RV32-NEXT: vsrl.vi v9, v8, 1
684 ; RV32-NEXT: lui a0, 349525
685 ; RV32-NEXT: addi a0, a0, 1365
686 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
687 ; RV32-NEXT: vmv.v.x v10, a0
688 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
689 ; RV32-NEXT: vand.vv v9, v9, v10
690 ; RV32-NEXT: vsub.vv v8, v8, v9
691 ; RV32-NEXT: lui a0, 209715
692 ; RV32-NEXT: addi a0, a0, 819
693 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
694 ; RV32-NEXT: vmv.v.x v9, a0
695 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
696 ; RV32-NEXT: vand.vv v10, v8, v9
697 ; RV32-NEXT: vsrl.vi v8, v8, 2
698 ; RV32-NEXT: vand.vv v8, v8, v9
699 ; RV32-NEXT: vadd.vv v8, v10, v8
700 ; RV32-NEXT: vsrl.vi v9, v8, 4
701 ; RV32-NEXT: vadd.vv v8, v8, v9
702 ; RV32-NEXT: lui a0, 61681
703 ; RV32-NEXT: addi a0, a0, -241
704 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
705 ; RV32-NEXT: vmv.v.x v9, a0
706 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
707 ; RV32-NEXT: vand.vv v8, v8, v9
708 ; RV32-NEXT: lui a0, 4112
709 ; RV32-NEXT: addi a0, a0, 257
710 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
711 ; RV32-NEXT: vmv.v.x v9, a0
712 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
713 ; RV32-NEXT: vmul.vv v8, v8, v9
714 ; RV32-NEXT: li a0, 56
715 ; RV32-NEXT: vsrl.vx v8, v8, a0
718 ; RV64-LABEL: ctpop_nxv1i64:
720 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
721 ; RV64-NEXT: vsrl.vi v9, v8, 1
722 ; RV64-NEXT: lui a0, 349525
723 ; RV64-NEXT: addiw a0, a0, 1365
724 ; RV64-NEXT: slli a1, a0, 32
725 ; RV64-NEXT: add a0, a0, a1
726 ; RV64-NEXT: vand.vx v9, v9, a0
727 ; RV64-NEXT: vsub.vv v8, v8, v9
728 ; RV64-NEXT: lui a0, 209715
729 ; RV64-NEXT: addiw a0, a0, 819
730 ; RV64-NEXT: slli a1, a0, 32
731 ; RV64-NEXT: add a0, a0, a1
732 ; RV64-NEXT: vand.vx v9, v8, a0
733 ; RV64-NEXT: vsrl.vi v8, v8, 2
734 ; RV64-NEXT: vand.vx v8, v8, a0
735 ; RV64-NEXT: vadd.vv v8, v9, v8
736 ; RV64-NEXT: vsrl.vi v9, v8, 4
737 ; RV64-NEXT: vadd.vv v8, v8, v9
738 ; RV64-NEXT: lui a0, 61681
739 ; RV64-NEXT: addiw a0, a0, -241
740 ; RV64-NEXT: slli a1, a0, 32
741 ; RV64-NEXT: add a0, a0, a1
742 ; RV64-NEXT: vand.vx v8, v8, a0
743 ; RV64-NEXT: lui a0, 4112
744 ; RV64-NEXT: addiw a0, a0, 257
745 ; RV64-NEXT: slli a1, a0, 32
746 ; RV64-NEXT: add a0, a0, a1
747 ; RV64-NEXT: vmul.vx v8, v8, a0
748 ; RV64-NEXT: li a0, 56
749 ; RV64-NEXT: vsrl.vx v8, v8, a0
752 ; CHECK-ZVBB-LABEL: ctpop_nxv1i64:
753 ; CHECK-ZVBB: # %bb.0:
754 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
755 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
756 ; CHECK-ZVBB-NEXT: ret
757 %a = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> %va)
758 ret <vscale x 1 x i64> %a
760 declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
762 define <vscale x 2 x i64> @ctpop_nxv2i64(<vscale x 2 x i64> %va) {
763 ; RV32-LABEL: ctpop_nxv2i64:
765 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
766 ; RV32-NEXT: vsrl.vi v10, v8, 1
767 ; RV32-NEXT: lui a0, 349525
768 ; RV32-NEXT: addi a0, a0, 1365
769 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
770 ; RV32-NEXT: vmv.v.x v12, a0
771 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
772 ; RV32-NEXT: vand.vv v10, v10, v12
773 ; RV32-NEXT: vsub.vv v8, v8, v10
774 ; RV32-NEXT: lui a0, 209715
775 ; RV32-NEXT: addi a0, a0, 819
776 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
777 ; RV32-NEXT: vmv.v.x v10, a0
778 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
779 ; RV32-NEXT: vand.vv v12, v8, v10
780 ; RV32-NEXT: vsrl.vi v8, v8, 2
781 ; RV32-NEXT: vand.vv v8, v8, v10
782 ; RV32-NEXT: vadd.vv v8, v12, v8
783 ; RV32-NEXT: vsrl.vi v10, v8, 4
784 ; RV32-NEXT: vadd.vv v8, v8, v10
785 ; RV32-NEXT: lui a0, 61681
786 ; RV32-NEXT: addi a0, a0, -241
787 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
788 ; RV32-NEXT: vmv.v.x v10, a0
789 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
790 ; RV32-NEXT: vand.vv v8, v8, v10
791 ; RV32-NEXT: lui a0, 4112
792 ; RV32-NEXT: addi a0, a0, 257
793 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
794 ; RV32-NEXT: vmv.v.x v10, a0
795 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
796 ; RV32-NEXT: vmul.vv v8, v8, v10
797 ; RV32-NEXT: li a0, 56
798 ; RV32-NEXT: vsrl.vx v8, v8, a0
801 ; RV64-LABEL: ctpop_nxv2i64:
803 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
804 ; RV64-NEXT: vsrl.vi v10, v8, 1
805 ; RV64-NEXT: lui a0, 349525
806 ; RV64-NEXT: addiw a0, a0, 1365
807 ; RV64-NEXT: slli a1, a0, 32
808 ; RV64-NEXT: add a0, a0, a1
809 ; RV64-NEXT: vand.vx v10, v10, a0
810 ; RV64-NEXT: vsub.vv v8, v8, v10
811 ; RV64-NEXT: lui a0, 209715
812 ; RV64-NEXT: addiw a0, a0, 819
813 ; RV64-NEXT: slli a1, a0, 32
814 ; RV64-NEXT: add a0, a0, a1
815 ; RV64-NEXT: vand.vx v10, v8, a0
816 ; RV64-NEXT: vsrl.vi v8, v8, 2
817 ; RV64-NEXT: vand.vx v8, v8, a0
818 ; RV64-NEXT: vadd.vv v8, v10, v8
819 ; RV64-NEXT: vsrl.vi v10, v8, 4
820 ; RV64-NEXT: vadd.vv v8, v8, v10
821 ; RV64-NEXT: lui a0, 61681
822 ; RV64-NEXT: addiw a0, a0, -241
823 ; RV64-NEXT: slli a1, a0, 32
824 ; RV64-NEXT: add a0, a0, a1
825 ; RV64-NEXT: vand.vx v8, v8, a0
826 ; RV64-NEXT: lui a0, 4112
827 ; RV64-NEXT: addiw a0, a0, 257
828 ; RV64-NEXT: slli a1, a0, 32
829 ; RV64-NEXT: add a0, a0, a1
830 ; RV64-NEXT: vmul.vx v8, v8, a0
831 ; RV64-NEXT: li a0, 56
832 ; RV64-NEXT: vsrl.vx v8, v8, a0
835 ; CHECK-ZVBB-LABEL: ctpop_nxv2i64:
836 ; CHECK-ZVBB: # %bb.0:
837 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
838 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
839 ; CHECK-ZVBB-NEXT: ret
840 %a = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %va)
841 ret <vscale x 2 x i64> %a
843 declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
845 define <vscale x 4 x i64> @ctpop_nxv4i64(<vscale x 4 x i64> %va) {
846 ; RV32-LABEL: ctpop_nxv4i64:
848 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
849 ; RV32-NEXT: vsrl.vi v12, v8, 1
850 ; RV32-NEXT: lui a0, 349525
851 ; RV32-NEXT: addi a0, a0, 1365
852 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
853 ; RV32-NEXT: vmv.v.x v16, a0
854 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
855 ; RV32-NEXT: vand.vv v12, v12, v16
856 ; RV32-NEXT: vsub.vv v8, v8, v12
857 ; RV32-NEXT: lui a0, 209715
858 ; RV32-NEXT: addi a0, a0, 819
859 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
860 ; RV32-NEXT: vmv.v.x v12, a0
861 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
862 ; RV32-NEXT: vand.vv v16, v8, v12
863 ; RV32-NEXT: vsrl.vi v8, v8, 2
864 ; RV32-NEXT: vand.vv v8, v8, v12
865 ; RV32-NEXT: vadd.vv v8, v16, v8
866 ; RV32-NEXT: vsrl.vi v12, v8, 4
867 ; RV32-NEXT: vadd.vv v8, v8, v12
868 ; RV32-NEXT: lui a0, 61681
869 ; RV32-NEXT: addi a0, a0, -241
870 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
871 ; RV32-NEXT: vmv.v.x v12, a0
872 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
873 ; RV32-NEXT: vand.vv v8, v8, v12
874 ; RV32-NEXT: lui a0, 4112
875 ; RV32-NEXT: addi a0, a0, 257
876 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
877 ; RV32-NEXT: vmv.v.x v12, a0
878 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
879 ; RV32-NEXT: vmul.vv v8, v8, v12
880 ; RV32-NEXT: li a0, 56
881 ; RV32-NEXT: vsrl.vx v8, v8, a0
884 ; RV64-LABEL: ctpop_nxv4i64:
886 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
887 ; RV64-NEXT: vsrl.vi v12, v8, 1
888 ; RV64-NEXT: lui a0, 349525
889 ; RV64-NEXT: addiw a0, a0, 1365
890 ; RV64-NEXT: slli a1, a0, 32
891 ; RV64-NEXT: add a0, a0, a1
892 ; RV64-NEXT: vand.vx v12, v12, a0
893 ; RV64-NEXT: vsub.vv v8, v8, v12
894 ; RV64-NEXT: lui a0, 209715
895 ; RV64-NEXT: addiw a0, a0, 819
896 ; RV64-NEXT: slli a1, a0, 32
897 ; RV64-NEXT: add a0, a0, a1
898 ; RV64-NEXT: vand.vx v12, v8, a0
899 ; RV64-NEXT: vsrl.vi v8, v8, 2
900 ; RV64-NEXT: vand.vx v8, v8, a0
901 ; RV64-NEXT: vadd.vv v8, v12, v8
902 ; RV64-NEXT: vsrl.vi v12, v8, 4
903 ; RV64-NEXT: vadd.vv v8, v8, v12
904 ; RV64-NEXT: lui a0, 61681
905 ; RV64-NEXT: addiw a0, a0, -241
906 ; RV64-NEXT: slli a1, a0, 32
907 ; RV64-NEXT: add a0, a0, a1
908 ; RV64-NEXT: vand.vx v8, v8, a0
909 ; RV64-NEXT: lui a0, 4112
910 ; RV64-NEXT: addiw a0, a0, 257
911 ; RV64-NEXT: slli a1, a0, 32
912 ; RV64-NEXT: add a0, a0, a1
913 ; RV64-NEXT: vmul.vx v8, v8, a0
914 ; RV64-NEXT: li a0, 56
915 ; RV64-NEXT: vsrl.vx v8, v8, a0
918 ; CHECK-ZVBB-LABEL: ctpop_nxv4i64:
919 ; CHECK-ZVBB: # %bb.0:
920 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
921 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
922 ; CHECK-ZVBB-NEXT: ret
923 %a = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> %va)
924 ret <vscale x 4 x i64> %a
926 declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
928 define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) {
929 ; RV32-LABEL: ctpop_nxv8i64:
931 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
932 ; RV32-NEXT: vsrl.vi v16, v8, 1
933 ; RV32-NEXT: lui a0, 349525
934 ; RV32-NEXT: addi a0, a0, 1365
935 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
936 ; RV32-NEXT: vmv.v.x v24, a0
937 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
938 ; RV32-NEXT: vand.vv v16, v16, v24
939 ; RV32-NEXT: vsub.vv v8, v8, v16
940 ; RV32-NEXT: lui a0, 209715
941 ; RV32-NEXT: addi a0, a0, 819
942 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
943 ; RV32-NEXT: vmv.v.x v16, a0
944 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
945 ; RV32-NEXT: vand.vv v24, v8, v16
946 ; RV32-NEXT: vsrl.vi v8, v8, 2
947 ; RV32-NEXT: vand.vv v8, v8, v16
948 ; RV32-NEXT: vadd.vv v8, v24, v8
949 ; RV32-NEXT: vsrl.vi v16, v8, 4
950 ; RV32-NEXT: vadd.vv v8, v8, v16
951 ; RV32-NEXT: lui a0, 61681
952 ; RV32-NEXT: addi a0, a0, -241
953 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
954 ; RV32-NEXT: vmv.v.x v16, a0
955 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
956 ; RV32-NEXT: vand.vv v8, v8, v16
957 ; RV32-NEXT: lui a0, 4112
958 ; RV32-NEXT: addi a0, a0, 257
959 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
960 ; RV32-NEXT: vmv.v.x v16, a0
961 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
962 ; RV32-NEXT: vmul.vv v8, v8, v16
963 ; RV32-NEXT: li a0, 56
964 ; RV32-NEXT: vsrl.vx v8, v8, a0
967 ; RV64-LABEL: ctpop_nxv8i64:
969 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
970 ; RV64-NEXT: vsrl.vi v16, v8, 1
971 ; RV64-NEXT: lui a0, 349525
972 ; RV64-NEXT: addiw a0, a0, 1365
973 ; RV64-NEXT: slli a1, a0, 32
974 ; RV64-NEXT: add a0, a0, a1
975 ; RV64-NEXT: vand.vx v16, v16, a0
976 ; RV64-NEXT: vsub.vv v8, v8, v16
977 ; RV64-NEXT: lui a0, 209715
978 ; RV64-NEXT: addiw a0, a0, 819
979 ; RV64-NEXT: slli a1, a0, 32
980 ; RV64-NEXT: add a0, a0, a1
981 ; RV64-NEXT: vand.vx v16, v8, a0
982 ; RV64-NEXT: vsrl.vi v8, v8, 2
983 ; RV64-NEXT: vand.vx v8, v8, a0
984 ; RV64-NEXT: vadd.vv v8, v16, v8
985 ; RV64-NEXT: vsrl.vi v16, v8, 4
986 ; RV64-NEXT: vadd.vv v8, v8, v16
987 ; RV64-NEXT: lui a0, 61681
988 ; RV64-NEXT: addiw a0, a0, -241
989 ; RV64-NEXT: slli a1, a0, 32
990 ; RV64-NEXT: add a0, a0, a1
991 ; RV64-NEXT: vand.vx v8, v8, a0
992 ; RV64-NEXT: lui a0, 4112
993 ; RV64-NEXT: addiw a0, a0, 257
994 ; RV64-NEXT: slli a1, a0, 32
995 ; RV64-NEXT: add a0, a0, a1
996 ; RV64-NEXT: vmul.vx v8, v8, a0
997 ; RV64-NEXT: li a0, 56
998 ; RV64-NEXT: vsrl.vx v8, v8, a0
1001 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64:
1002 ; CHECK-ZVBB: # %bb.0:
1003 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1004 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1005 ; CHECK-ZVBB-NEXT: ret
1006 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1007 ret <vscale x 8 x i64> %a
1010 ; We always emit vcpop.v for the scalable vector
1011 define <vscale x 8 x i1> @ctpop_nxv8i64_ult_two(<vscale x 8 x i64> %va) {
1012 ; CHECK-LABEL: ctpop_nxv8i64_ult_two:
1014 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1015 ; CHECK-NEXT: vadd.vi v16, v8, -1
1016 ; CHECK-NEXT: vand.vv v8, v8, v16
1017 ; CHECK-NEXT: vmseq.vi v0, v8, 0
1020 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ult_two:
1021 ; CHECK-ZVBB: # %bb.0:
1022 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1023 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1024 ; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1
1025 ; CHECK-ZVBB-NEXT: ret
1026 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1027 %cmp = icmp ult <vscale x 8 x i64> %a, shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 2, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
1028 ret <vscale x 8 x i1> %cmp
1031 define <vscale x 8 x i1> @ctpop_nxv8i64_ugt_one(<vscale x 8 x i64> %va) {
1032 ; CHECK-LABEL: ctpop_nxv8i64_ugt_one:
1034 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1035 ; CHECK-NEXT: vadd.vi v16, v8, -1
1036 ; CHECK-NEXT: vand.vv v8, v8, v16
1037 ; CHECK-NEXT: vmsne.vi v0, v8, 0
1040 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ugt_one:
1041 ; CHECK-ZVBB: # %bb.0:
1042 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1043 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1044 ; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1
1045 ; CHECK-ZVBB-NEXT: ret
1046 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1047 %cmp = icmp ugt <vscale x 8 x i64> %a, shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
1048 ret <vscale x 8 x i1> %cmp
1051 define <vscale x 8 x i1> @ctpop_nxv8i64_eq_one(<vscale x 8 x i64> %va) {
1052 ; CHECK-LABEL: ctpop_nxv8i64_eq_one:
1054 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1055 ; CHECK-NEXT: vadd.vi v16, v8, -1
1056 ; CHECK-NEXT: vand.vv v16, v8, v16
1057 ; CHECK-NEXT: vmseq.vi v24, v16, 0
1058 ; CHECK-NEXT: vmsne.vi v16, v8, 0
1059 ; CHECK-NEXT: vmand.mm v0, v16, v24
1062 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_eq_one:
1063 ; CHECK-ZVBB: # %bb.0:
1064 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1065 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1066 ; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1
1067 ; CHECK-ZVBB-NEXT: ret
1068 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1069 %cmp = icmp eq <vscale x 8 x i64> %a, shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
1070 ret <vscale x 8 x i1> %cmp
1073 define <vscale x 8 x i1> @ctpop_nxv8i64_ne_one(<vscale x 8 x i64> %va) {
1074 ; CHECK-LABEL: ctpop_nxv8i64_ne_one:
1076 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1077 ; CHECK-NEXT: vadd.vi v16, v8, -1
1078 ; CHECK-NEXT: vand.vv v16, v8, v16
1079 ; CHECK-NEXT: vmsne.vi v24, v16, 0
1080 ; CHECK-NEXT: vmseq.vi v16, v8, 0
1081 ; CHECK-NEXT: vmor.mm v0, v16, v24
1084 ; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ne_one:
1085 ; CHECK-ZVBB: # %bb.0:
1086 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1087 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1088 ; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1
1089 ; CHECK-ZVBB-NEXT: ret
1090 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1091 %cmp = icmp ne <vscale x 8 x i64> %a, shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
1092 ret <vscale x 8 x i1> %cmp
1095 declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)