1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
9 define <2 x i8> @vp_ctpop_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vp_ctpop_v2i8:
12 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
14 ; CHECK-NEXT: li a0, 85
15 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
16 ; CHECK-NEXT: li a0, 51
17 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
18 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
19 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
20 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
21 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
22 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
23 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
24 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
26 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl)
30 define <2 x i8> @vp_ctpop_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
31 ; CHECK-LABEL: vp_ctpop_v2i8_unmasked:
33 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
34 ; CHECK-NEXT: vsrl.vi v9, v8, 1
35 ; CHECK-NEXT: li a0, 85
36 ; CHECK-NEXT: vand.vx v9, v9, a0
37 ; CHECK-NEXT: li a0, 51
38 ; CHECK-NEXT: vsub.vv v8, v8, v9
39 ; CHECK-NEXT: vand.vx v9, v8, a0
40 ; CHECK-NEXT: vsrl.vi v8, v8, 2
41 ; CHECK-NEXT: vand.vx v8, v8, a0
42 ; CHECK-NEXT: vadd.vv v8, v9, v8
43 ; CHECK-NEXT: vsrl.vi v9, v8, 4
44 ; CHECK-NEXT: vadd.vv v8, v8, v9
45 ; CHECK-NEXT: vand.vi v8, v8, 15
47 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl)
51 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
53 define <4 x i8> @vp_ctpop_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
54 ; CHECK-LABEL: vp_ctpop_v4i8:
56 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
57 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
58 ; CHECK-NEXT: li a0, 85
59 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
60 ; CHECK-NEXT: li a0, 51
61 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
62 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
63 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
64 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
65 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
66 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
67 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
68 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
70 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
74 define <4 x i8> @vp_ctpop_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
75 ; CHECK-LABEL: vp_ctpop_v4i8_unmasked:
77 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
78 ; CHECK-NEXT: vsrl.vi v9, v8, 1
79 ; CHECK-NEXT: li a0, 85
80 ; CHECK-NEXT: vand.vx v9, v9, a0
81 ; CHECK-NEXT: li a0, 51
82 ; CHECK-NEXT: vsub.vv v8, v8, v9
83 ; CHECK-NEXT: vand.vx v9, v8, a0
84 ; CHECK-NEXT: vsrl.vi v8, v8, 2
85 ; CHECK-NEXT: vand.vx v8, v8, a0
86 ; CHECK-NEXT: vadd.vv v8, v9, v8
87 ; CHECK-NEXT: vsrl.vi v9, v8, 4
88 ; CHECK-NEXT: vadd.vv v8, v8, v9
89 ; CHECK-NEXT: vand.vi v8, v8, 15
91 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
95 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
97 define <8 x i8> @vp_ctpop_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
98 ; CHECK-LABEL: vp_ctpop_v8i8:
100 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
101 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
102 ; CHECK-NEXT: li a0, 85
103 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
104 ; CHECK-NEXT: li a0, 51
105 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
106 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
107 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
108 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
109 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
110 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
111 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
112 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
114 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl)
118 define <8 x i8> @vp_ctpop_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
119 ; CHECK-LABEL: vp_ctpop_v8i8_unmasked:
121 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
122 ; CHECK-NEXT: vsrl.vi v9, v8, 1
123 ; CHECK-NEXT: li a0, 85
124 ; CHECK-NEXT: vand.vx v9, v9, a0
125 ; CHECK-NEXT: li a0, 51
126 ; CHECK-NEXT: vsub.vv v8, v8, v9
127 ; CHECK-NEXT: vand.vx v9, v8, a0
128 ; CHECK-NEXT: vsrl.vi v8, v8, 2
129 ; CHECK-NEXT: vand.vx v8, v8, a0
130 ; CHECK-NEXT: vadd.vv v8, v9, v8
131 ; CHECK-NEXT: vsrl.vi v9, v8, 4
132 ; CHECK-NEXT: vadd.vv v8, v8, v9
133 ; CHECK-NEXT: vand.vi v8, v8, 15
135 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl)
139 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
141 define <16 x i8> @vp_ctpop_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
142 ; CHECK-LABEL: vp_ctpop_v16i8:
144 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
145 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
146 ; CHECK-NEXT: li a0, 85
147 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
148 ; CHECK-NEXT: li a0, 51
149 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
150 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
151 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
152 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
153 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
154 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
155 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
156 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
158 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl)
162 define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
163 ; CHECK-LABEL: vp_ctpop_v16i8_unmasked:
165 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
166 ; CHECK-NEXT: vsrl.vi v9, v8, 1
167 ; CHECK-NEXT: li a0, 85
168 ; CHECK-NEXT: vand.vx v9, v9, a0
169 ; CHECK-NEXT: li a0, 51
170 ; CHECK-NEXT: vsub.vv v8, v8, v9
171 ; CHECK-NEXT: vand.vx v9, v8, a0
172 ; CHECK-NEXT: vsrl.vi v8, v8, 2
173 ; CHECK-NEXT: vand.vx v8, v8, a0
174 ; CHECK-NEXT: vadd.vv v8, v9, v8
175 ; CHECK-NEXT: vsrl.vi v9, v8, 4
176 ; CHECK-NEXT: vadd.vv v8, v8, v9
177 ; CHECK-NEXT: vand.vi v8, v8, 15
179 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl)
183 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
185 define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
186 ; CHECK-LABEL: vp_ctpop_v2i16:
188 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
189 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
190 ; CHECK-NEXT: lui a0, 5
191 ; CHECK-NEXT: addi a0, a0, 1365
192 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
193 ; CHECK-NEXT: lui a0, 3
194 ; CHECK-NEXT: addi a0, a0, 819
195 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
196 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
197 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
198 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
199 ; CHECK-NEXT: lui a0, 1
200 ; CHECK-NEXT: addi a0, a0, -241
201 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
202 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
203 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
204 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
205 ; CHECK-NEXT: li a0, 257
206 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
207 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
209 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
213 define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
214 ; CHECK-LABEL: vp_ctpop_v2i16_unmasked:
216 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
217 ; CHECK-NEXT: vsrl.vi v9, v8, 1
218 ; CHECK-NEXT: lui a0, 5
219 ; CHECK-NEXT: addi a0, a0, 1365
220 ; CHECK-NEXT: vand.vx v9, v9, a0
221 ; CHECK-NEXT: lui a0, 3
222 ; CHECK-NEXT: addi a0, a0, 819
223 ; CHECK-NEXT: vsub.vv v8, v8, v9
224 ; CHECK-NEXT: vand.vx v9, v8, a0
225 ; CHECK-NEXT: vsrl.vi v8, v8, 2
226 ; CHECK-NEXT: vand.vx v8, v8, a0
227 ; CHECK-NEXT: lui a0, 1
228 ; CHECK-NEXT: addi a0, a0, -241
229 ; CHECK-NEXT: vadd.vv v8, v9, v8
230 ; CHECK-NEXT: vsrl.vi v9, v8, 4
231 ; CHECK-NEXT: vadd.vv v8, v8, v9
232 ; CHECK-NEXT: vand.vx v8, v8, a0
233 ; CHECK-NEXT: li a0, 257
234 ; CHECK-NEXT: vmul.vx v8, v8, a0
235 ; CHECK-NEXT: vsrl.vi v8, v8, 8
237 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl)
241 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
243 define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
244 ; CHECK-LABEL: vp_ctpop_v4i16:
246 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
247 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
248 ; CHECK-NEXT: lui a0, 5
249 ; CHECK-NEXT: addi a0, a0, 1365
250 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
251 ; CHECK-NEXT: lui a0, 3
252 ; CHECK-NEXT: addi a0, a0, 819
253 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
254 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
255 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
256 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
257 ; CHECK-NEXT: lui a0, 1
258 ; CHECK-NEXT: addi a0, a0, -241
259 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
260 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
261 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
262 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
263 ; CHECK-NEXT: li a0, 257
264 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
265 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
267 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
271 define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
272 ; CHECK-LABEL: vp_ctpop_v4i16_unmasked:
274 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
275 ; CHECK-NEXT: vsrl.vi v9, v8, 1
276 ; CHECK-NEXT: lui a0, 5
277 ; CHECK-NEXT: addi a0, a0, 1365
278 ; CHECK-NEXT: vand.vx v9, v9, a0
279 ; CHECK-NEXT: lui a0, 3
280 ; CHECK-NEXT: addi a0, a0, 819
281 ; CHECK-NEXT: vsub.vv v8, v8, v9
282 ; CHECK-NEXT: vand.vx v9, v8, a0
283 ; CHECK-NEXT: vsrl.vi v8, v8, 2
284 ; CHECK-NEXT: vand.vx v8, v8, a0
285 ; CHECK-NEXT: lui a0, 1
286 ; CHECK-NEXT: addi a0, a0, -241
287 ; CHECK-NEXT: vadd.vv v8, v9, v8
288 ; CHECK-NEXT: vsrl.vi v9, v8, 4
289 ; CHECK-NEXT: vadd.vv v8, v8, v9
290 ; CHECK-NEXT: vand.vx v8, v8, a0
291 ; CHECK-NEXT: li a0, 257
292 ; CHECK-NEXT: vmul.vx v8, v8, a0
293 ; CHECK-NEXT: vsrl.vi v8, v8, 8
295 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
299 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
301 define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
302 ; CHECK-LABEL: vp_ctpop_v8i16:
304 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
305 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
306 ; CHECK-NEXT: lui a0, 5
307 ; CHECK-NEXT: addi a0, a0, 1365
308 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
309 ; CHECK-NEXT: lui a0, 3
310 ; CHECK-NEXT: addi a0, a0, 819
311 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
312 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
313 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
314 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
315 ; CHECK-NEXT: lui a0, 1
316 ; CHECK-NEXT: addi a0, a0, -241
317 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
318 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
319 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
320 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
321 ; CHECK-NEXT: li a0, 257
322 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
323 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
325 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
329 define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
330 ; CHECK-LABEL: vp_ctpop_v8i16_unmasked:
332 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
333 ; CHECK-NEXT: vsrl.vi v9, v8, 1
334 ; CHECK-NEXT: lui a0, 5
335 ; CHECK-NEXT: addi a0, a0, 1365
336 ; CHECK-NEXT: vand.vx v9, v9, a0
337 ; CHECK-NEXT: lui a0, 3
338 ; CHECK-NEXT: addi a0, a0, 819
339 ; CHECK-NEXT: vsub.vv v8, v8, v9
340 ; CHECK-NEXT: vand.vx v9, v8, a0
341 ; CHECK-NEXT: vsrl.vi v8, v8, 2
342 ; CHECK-NEXT: vand.vx v8, v8, a0
343 ; CHECK-NEXT: lui a0, 1
344 ; CHECK-NEXT: addi a0, a0, -241
345 ; CHECK-NEXT: vadd.vv v8, v9, v8
346 ; CHECK-NEXT: vsrl.vi v9, v8, 4
347 ; CHECK-NEXT: vadd.vv v8, v8, v9
348 ; CHECK-NEXT: vand.vx v8, v8, a0
349 ; CHECK-NEXT: li a0, 257
350 ; CHECK-NEXT: vmul.vx v8, v8, a0
351 ; CHECK-NEXT: vsrl.vi v8, v8, 8
353 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl)
357 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
359 define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
360 ; CHECK-LABEL: vp_ctpop_v16i16:
362 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
363 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
364 ; CHECK-NEXT: lui a0, 5
365 ; CHECK-NEXT: addi a0, a0, 1365
366 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
367 ; CHECK-NEXT: lui a0, 3
368 ; CHECK-NEXT: addi a0, a0, 819
369 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
370 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
371 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
372 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
373 ; CHECK-NEXT: lui a0, 1
374 ; CHECK-NEXT: addi a0, a0, -241
375 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
376 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
377 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
378 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
379 ; CHECK-NEXT: li a0, 257
380 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
381 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
383 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
387 define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
388 ; CHECK-LABEL: vp_ctpop_v16i16_unmasked:
390 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
391 ; CHECK-NEXT: vsrl.vi v10, v8, 1
392 ; CHECK-NEXT: lui a0, 5
393 ; CHECK-NEXT: addi a0, a0, 1365
394 ; CHECK-NEXT: vand.vx v10, v10, a0
395 ; CHECK-NEXT: lui a0, 3
396 ; CHECK-NEXT: addi a0, a0, 819
397 ; CHECK-NEXT: vsub.vv v8, v8, v10
398 ; CHECK-NEXT: vand.vx v10, v8, a0
399 ; CHECK-NEXT: vsrl.vi v8, v8, 2
400 ; CHECK-NEXT: vand.vx v8, v8, a0
401 ; CHECK-NEXT: lui a0, 1
402 ; CHECK-NEXT: addi a0, a0, -241
403 ; CHECK-NEXT: vadd.vv v8, v10, v8
404 ; CHECK-NEXT: vsrl.vi v10, v8, 4
405 ; CHECK-NEXT: vadd.vv v8, v8, v10
406 ; CHECK-NEXT: vand.vx v8, v8, a0
407 ; CHECK-NEXT: li a0, 257
408 ; CHECK-NEXT: vmul.vx v8, v8, a0
409 ; CHECK-NEXT: vsrl.vi v8, v8, 8
411 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl)
415 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
417 define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
418 ; CHECK-LABEL: vp_ctpop_v2i32:
420 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
421 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
422 ; CHECK-NEXT: lui a0, 349525
423 ; CHECK-NEXT: addi a0, a0, 1365
424 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
425 ; CHECK-NEXT: lui a0, 209715
426 ; CHECK-NEXT: addi a0, a0, 819
427 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
428 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
429 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
430 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
431 ; CHECK-NEXT: lui a0, 61681
432 ; CHECK-NEXT: addi a0, a0, -241
433 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
434 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
435 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
436 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
437 ; CHECK-NEXT: lui a0, 4112
438 ; CHECK-NEXT: addi a0, a0, 257
439 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
440 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
442 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
446 define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
447 ; CHECK-LABEL: vp_ctpop_v2i32_unmasked:
449 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
450 ; CHECK-NEXT: vsrl.vi v9, v8, 1
451 ; CHECK-NEXT: lui a0, 349525
452 ; CHECK-NEXT: addi a0, a0, 1365
453 ; CHECK-NEXT: vand.vx v9, v9, a0
454 ; CHECK-NEXT: lui a0, 209715
455 ; CHECK-NEXT: addi a0, a0, 819
456 ; CHECK-NEXT: vsub.vv v8, v8, v9
457 ; CHECK-NEXT: vand.vx v9, v8, a0
458 ; CHECK-NEXT: vsrl.vi v8, v8, 2
459 ; CHECK-NEXT: vand.vx v8, v8, a0
460 ; CHECK-NEXT: lui a0, 61681
461 ; CHECK-NEXT: addi a0, a0, -241
462 ; CHECK-NEXT: vadd.vv v8, v9, v8
463 ; CHECK-NEXT: vsrl.vi v9, v8, 4
464 ; CHECK-NEXT: vadd.vv v8, v8, v9
465 ; CHECK-NEXT: vand.vx v8, v8, a0
466 ; CHECK-NEXT: lui a0, 4112
467 ; CHECK-NEXT: addi a0, a0, 257
468 ; CHECK-NEXT: vmul.vx v8, v8, a0
469 ; CHECK-NEXT: vsrl.vi v8, v8, 24
471 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl)
475 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
477 define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
478 ; CHECK-LABEL: vp_ctpop_v4i32:
480 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
481 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
482 ; CHECK-NEXT: lui a0, 349525
483 ; CHECK-NEXT: addi a0, a0, 1365
484 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
485 ; CHECK-NEXT: lui a0, 209715
486 ; CHECK-NEXT: addi a0, a0, 819
487 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
488 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
489 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
490 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
491 ; CHECK-NEXT: lui a0, 61681
492 ; CHECK-NEXT: addi a0, a0, -241
493 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
494 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
495 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
496 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
497 ; CHECK-NEXT: lui a0, 4112
498 ; CHECK-NEXT: addi a0, a0, 257
499 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
500 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
502 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
506 define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
507 ; CHECK-LABEL: vp_ctpop_v4i32_unmasked:
509 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
510 ; CHECK-NEXT: vsrl.vi v9, v8, 1
511 ; CHECK-NEXT: lui a0, 349525
512 ; CHECK-NEXT: addi a0, a0, 1365
513 ; CHECK-NEXT: vand.vx v9, v9, a0
514 ; CHECK-NEXT: lui a0, 209715
515 ; CHECK-NEXT: addi a0, a0, 819
516 ; CHECK-NEXT: vsub.vv v8, v8, v9
517 ; CHECK-NEXT: vand.vx v9, v8, a0
518 ; CHECK-NEXT: vsrl.vi v8, v8, 2
519 ; CHECK-NEXT: vand.vx v8, v8, a0
520 ; CHECK-NEXT: lui a0, 61681
521 ; CHECK-NEXT: addi a0, a0, -241
522 ; CHECK-NEXT: vadd.vv v8, v9, v8
523 ; CHECK-NEXT: vsrl.vi v9, v8, 4
524 ; CHECK-NEXT: vadd.vv v8, v8, v9
525 ; CHECK-NEXT: vand.vx v8, v8, a0
526 ; CHECK-NEXT: lui a0, 4112
527 ; CHECK-NEXT: addi a0, a0, 257
528 ; CHECK-NEXT: vmul.vx v8, v8, a0
529 ; CHECK-NEXT: vsrl.vi v8, v8, 24
531 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
535 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
537 define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
538 ; CHECK-LABEL: vp_ctpop_v8i32:
540 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
541 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
542 ; CHECK-NEXT: lui a0, 349525
543 ; CHECK-NEXT: addi a0, a0, 1365
544 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
545 ; CHECK-NEXT: lui a0, 209715
546 ; CHECK-NEXT: addi a0, a0, 819
547 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
548 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
549 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
550 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
551 ; CHECK-NEXT: lui a0, 61681
552 ; CHECK-NEXT: addi a0, a0, -241
553 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
554 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
555 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
556 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
557 ; CHECK-NEXT: lui a0, 4112
558 ; CHECK-NEXT: addi a0, a0, 257
559 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
560 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
562 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
566 define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
567 ; CHECK-LABEL: vp_ctpop_v8i32_unmasked:
569 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
570 ; CHECK-NEXT: vsrl.vi v10, v8, 1
571 ; CHECK-NEXT: lui a0, 349525
572 ; CHECK-NEXT: addi a0, a0, 1365
573 ; CHECK-NEXT: vand.vx v10, v10, a0
574 ; CHECK-NEXT: lui a0, 209715
575 ; CHECK-NEXT: addi a0, a0, 819
576 ; CHECK-NEXT: vsub.vv v8, v8, v10
577 ; CHECK-NEXT: vand.vx v10, v8, a0
578 ; CHECK-NEXT: vsrl.vi v8, v8, 2
579 ; CHECK-NEXT: vand.vx v8, v8, a0
580 ; CHECK-NEXT: lui a0, 61681
581 ; CHECK-NEXT: addi a0, a0, -241
582 ; CHECK-NEXT: vadd.vv v8, v10, v8
583 ; CHECK-NEXT: vsrl.vi v10, v8, 4
584 ; CHECK-NEXT: vadd.vv v8, v8, v10
585 ; CHECK-NEXT: vand.vx v8, v8, a0
586 ; CHECK-NEXT: lui a0, 4112
587 ; CHECK-NEXT: addi a0, a0, 257
588 ; CHECK-NEXT: vmul.vx v8, v8, a0
589 ; CHECK-NEXT: vsrl.vi v8, v8, 24
591 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl)
595 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
597 define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
598 ; CHECK-LABEL: vp_ctpop_v16i32:
600 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
601 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
602 ; CHECK-NEXT: lui a0, 349525
603 ; CHECK-NEXT: addi a0, a0, 1365
604 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
605 ; CHECK-NEXT: lui a0, 209715
606 ; CHECK-NEXT: addi a0, a0, 819
607 ; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
608 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
609 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
610 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
611 ; CHECK-NEXT: lui a0, 61681
612 ; CHECK-NEXT: addi a0, a0, -241
613 ; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
614 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
615 ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
616 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
617 ; CHECK-NEXT: lui a0, 4112
618 ; CHECK-NEXT: addi a0, a0, 257
619 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
620 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
622 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
626 define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
627 ; CHECK-LABEL: vp_ctpop_v16i32_unmasked:
629 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
630 ; CHECK-NEXT: vsrl.vi v12, v8, 1
631 ; CHECK-NEXT: lui a0, 349525
632 ; CHECK-NEXT: addi a0, a0, 1365
633 ; CHECK-NEXT: vand.vx v12, v12, a0
634 ; CHECK-NEXT: lui a0, 209715
635 ; CHECK-NEXT: addi a0, a0, 819
636 ; CHECK-NEXT: vsub.vv v8, v8, v12
637 ; CHECK-NEXT: vand.vx v12, v8, a0
638 ; CHECK-NEXT: vsrl.vi v8, v8, 2
639 ; CHECK-NEXT: vand.vx v8, v8, a0
640 ; CHECK-NEXT: lui a0, 61681
641 ; CHECK-NEXT: addi a0, a0, -241
642 ; CHECK-NEXT: vadd.vv v8, v12, v8
643 ; CHECK-NEXT: vsrl.vi v12, v8, 4
644 ; CHECK-NEXT: vadd.vv v8, v8, v12
645 ; CHECK-NEXT: vand.vx v8, v8, a0
646 ; CHECK-NEXT: lui a0, 4112
647 ; CHECK-NEXT: addi a0, a0, 257
648 ; CHECK-NEXT: vmul.vx v8, v8, a0
649 ; CHECK-NEXT: vsrl.vi v8, v8, 24
651 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl)
655 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
657 define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
658 ; RV32-LABEL: vp_ctpop_v2i64:
660 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
661 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
662 ; RV32-NEXT: lui a1, 349525
663 ; RV32-NEXT: addi a1, a1, 1365
664 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
665 ; RV32-NEXT: vmv.v.x v10, a1
666 ; RV32-NEXT: lui a1, 209715
667 ; RV32-NEXT: addi a1, a1, 819
668 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
669 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
670 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
671 ; RV32-NEXT: vmv.v.x v10, a1
672 ; RV32-NEXT: lui a1, 61681
673 ; RV32-NEXT: addi a1, a1, -241
674 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
675 ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
676 ; RV32-NEXT: vand.vv v9, v8, v10, v0.t
677 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
678 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
679 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
680 ; RV32-NEXT: vmv.v.x v10, a1
681 ; RV32-NEXT: lui a1, 4112
682 ; RV32-NEXT: addi a1, a1, 257
683 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
684 ; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
685 ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
686 ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
687 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
688 ; RV32-NEXT: vmv.v.x v9, a1
689 ; RV32-NEXT: li a1, 56
690 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
691 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
692 ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
693 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
696 ; RV64-LABEL: vp_ctpop_v2i64:
698 ; RV64-NEXT: lui a1, 349525
699 ; RV64-NEXT: lui a2, 209715
700 ; RV64-NEXT: lui a3, 61681
701 ; RV64-NEXT: lui a4, 4112
702 ; RV64-NEXT: addiw a1, a1, 1365
703 ; RV64-NEXT: addiw a2, a2, 819
704 ; RV64-NEXT: addiw a3, a3, -241
705 ; RV64-NEXT: addiw a4, a4, 257
706 ; RV64-NEXT: slli a5, a1, 32
707 ; RV64-NEXT: add a1, a1, a5
708 ; RV64-NEXT: slli a5, a2, 32
709 ; RV64-NEXT: add a2, a2, a5
710 ; RV64-NEXT: slli a5, a3, 32
711 ; RV64-NEXT: add a3, a3, a5
712 ; RV64-NEXT: slli a5, a4, 32
713 ; RV64-NEXT: add a4, a4, a5
714 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
715 ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
716 ; RV64-NEXT: vand.vx v9, v9, a1, v0.t
717 ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
718 ; RV64-NEXT: vand.vx v9, v8, a2, v0.t
719 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
720 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
721 ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
722 ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
723 ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
724 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
725 ; RV64-NEXT: li a0, 56
726 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
727 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
729 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
733 define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
734 ; RV32-LABEL: vp_ctpop_v2i64_unmasked:
736 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
737 ; RV32-NEXT: vsrl.vi v9, v8, 1
738 ; RV32-NEXT: lui a1, 349525
739 ; RV32-NEXT: addi a1, a1, 1365
740 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
741 ; RV32-NEXT: vmv.v.x v10, a1
742 ; RV32-NEXT: lui a1, 209715
743 ; RV32-NEXT: addi a1, a1, 819
744 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
745 ; RV32-NEXT: vand.vv v9, v9, v10
746 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
747 ; RV32-NEXT: vmv.v.x v10, a1
748 ; RV32-NEXT: lui a1, 61681
749 ; RV32-NEXT: addi a1, a1, -241
750 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
751 ; RV32-NEXT: vsub.vv v8, v8, v9
752 ; RV32-NEXT: vand.vv v9, v8, v10
753 ; RV32-NEXT: vsrl.vi v8, v8, 2
754 ; RV32-NEXT: vand.vv v8, v8, v10
755 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
756 ; RV32-NEXT: vmv.v.x v10, a1
757 ; RV32-NEXT: lui a1, 4112
758 ; RV32-NEXT: addi a1, a1, 257
759 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
760 ; RV32-NEXT: vadd.vv v8, v9, v8
761 ; RV32-NEXT: vsrl.vi v9, v8, 4
762 ; RV32-NEXT: vadd.vv v8, v8, v9
763 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
764 ; RV32-NEXT: vmv.v.x v9, a1
765 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
766 ; RV32-NEXT: vand.vv v8, v8, v10
767 ; RV32-NEXT: vmul.vv v8, v8, v9
768 ; RV32-NEXT: li a0, 56
769 ; RV32-NEXT: vsrl.vx v8, v8, a0
772 ; RV64-LABEL: vp_ctpop_v2i64_unmasked:
774 ; RV64-NEXT: lui a1, 349525
775 ; RV64-NEXT: lui a2, 209715
776 ; RV64-NEXT: lui a3, 61681
777 ; RV64-NEXT: lui a4, 4112
778 ; RV64-NEXT: addiw a1, a1, 1365
779 ; RV64-NEXT: addiw a2, a2, 819
780 ; RV64-NEXT: addiw a3, a3, -241
781 ; RV64-NEXT: addiw a4, a4, 257
782 ; RV64-NEXT: slli a5, a1, 32
783 ; RV64-NEXT: add a1, a1, a5
784 ; RV64-NEXT: slli a5, a2, 32
785 ; RV64-NEXT: add a2, a2, a5
786 ; RV64-NEXT: slli a5, a3, 32
787 ; RV64-NEXT: add a3, a3, a5
788 ; RV64-NEXT: slli a5, a4, 32
789 ; RV64-NEXT: add a4, a4, a5
790 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
791 ; RV64-NEXT: vsrl.vi v9, v8, 1
792 ; RV64-NEXT: vand.vx v9, v9, a1
793 ; RV64-NEXT: vsub.vv v8, v8, v9
794 ; RV64-NEXT: vand.vx v9, v8, a2
795 ; RV64-NEXT: vsrl.vi v8, v8, 2
796 ; RV64-NEXT: vand.vx v8, v8, a2
797 ; RV64-NEXT: vadd.vv v8, v9, v8
798 ; RV64-NEXT: vsrl.vi v9, v8, 4
799 ; RV64-NEXT: vadd.vv v8, v8, v9
800 ; RV64-NEXT: vand.vx v8, v8, a3
801 ; RV64-NEXT: vmul.vx v8, v8, a4
802 ; RV64-NEXT: li a0, 56
803 ; RV64-NEXT: vsrl.vx v8, v8, a0
805 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl)
809 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
811 define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
812 ; RV32-LABEL: vp_ctpop_v4i64:
814 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
815 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
816 ; RV32-NEXT: lui a1, 349525
817 ; RV32-NEXT: addi a1, a1, 1365
818 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
819 ; RV32-NEXT: vmv.v.x v12, a1
820 ; RV32-NEXT: lui a1, 209715
821 ; RV32-NEXT: addi a1, a1, 819
822 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
823 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
824 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
825 ; RV32-NEXT: vmv.v.x v12, a1
826 ; RV32-NEXT: lui a1, 61681
827 ; RV32-NEXT: addi a1, a1, -241
828 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
829 ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
830 ; RV32-NEXT: vand.vv v10, v8, v12, v0.t
831 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
832 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
833 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
834 ; RV32-NEXT: vmv.v.x v12, a1
835 ; RV32-NEXT: lui a1, 4112
836 ; RV32-NEXT: addi a1, a1, 257
837 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
838 ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
839 ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
840 ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
841 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
842 ; RV32-NEXT: vmv.v.x v10, a1
843 ; RV32-NEXT: li a1, 56
844 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
845 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
846 ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
847 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
850 ; RV64-LABEL: vp_ctpop_v4i64:
852 ; RV64-NEXT: lui a1, 349525
853 ; RV64-NEXT: lui a2, 209715
854 ; RV64-NEXT: lui a3, 61681
855 ; RV64-NEXT: lui a4, 4112
856 ; RV64-NEXT: addiw a1, a1, 1365
857 ; RV64-NEXT: addiw a2, a2, 819
858 ; RV64-NEXT: addiw a3, a3, -241
859 ; RV64-NEXT: addiw a4, a4, 257
860 ; RV64-NEXT: slli a5, a1, 32
861 ; RV64-NEXT: add a1, a1, a5
862 ; RV64-NEXT: slli a5, a2, 32
863 ; RV64-NEXT: add a2, a2, a5
864 ; RV64-NEXT: slli a5, a3, 32
865 ; RV64-NEXT: add a3, a3, a5
866 ; RV64-NEXT: slli a5, a4, 32
867 ; RV64-NEXT: add a4, a4, a5
868 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
869 ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
870 ; RV64-NEXT: vand.vx v10, v10, a1, v0.t
871 ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
872 ; RV64-NEXT: vand.vx v10, v8, a2, v0.t
873 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
874 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
875 ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
876 ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
877 ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
878 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
879 ; RV64-NEXT: li a0, 56
880 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
881 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
883 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
887 define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
888 ; RV32-LABEL: vp_ctpop_v4i64_unmasked:
890 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
891 ; RV32-NEXT: vsrl.vi v10, v8, 1
892 ; RV32-NEXT: lui a1, 349525
893 ; RV32-NEXT: addi a1, a1, 1365
894 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
895 ; RV32-NEXT: vmv.v.x v12, a1
896 ; RV32-NEXT: lui a1, 209715
897 ; RV32-NEXT: addi a1, a1, 819
898 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
899 ; RV32-NEXT: vand.vv v10, v10, v12
900 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
901 ; RV32-NEXT: vmv.v.x v12, a1
902 ; RV32-NEXT: lui a1, 61681
903 ; RV32-NEXT: addi a1, a1, -241
904 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
905 ; RV32-NEXT: vsub.vv v8, v8, v10
906 ; RV32-NEXT: vand.vv v10, v8, v12
907 ; RV32-NEXT: vsrl.vi v8, v8, 2
908 ; RV32-NEXT: vand.vv v8, v8, v12
909 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
910 ; RV32-NEXT: vmv.v.x v12, a1
911 ; RV32-NEXT: lui a1, 4112
912 ; RV32-NEXT: addi a1, a1, 257
913 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
914 ; RV32-NEXT: vadd.vv v8, v10, v8
915 ; RV32-NEXT: vsrl.vi v10, v8, 4
916 ; RV32-NEXT: vadd.vv v8, v8, v10
917 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
918 ; RV32-NEXT: vmv.v.x v10, a1
919 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
920 ; RV32-NEXT: vand.vv v8, v8, v12
921 ; RV32-NEXT: vmul.vv v8, v8, v10
922 ; RV32-NEXT: li a0, 56
923 ; RV32-NEXT: vsrl.vx v8, v8, a0
926 ; RV64-LABEL: vp_ctpop_v4i64_unmasked:
928 ; RV64-NEXT: lui a1, 349525
929 ; RV64-NEXT: lui a2, 209715
930 ; RV64-NEXT: lui a3, 61681
931 ; RV64-NEXT: lui a4, 4112
932 ; RV64-NEXT: addiw a1, a1, 1365
933 ; RV64-NEXT: addiw a2, a2, 819
934 ; RV64-NEXT: addiw a3, a3, -241
935 ; RV64-NEXT: addiw a4, a4, 257
936 ; RV64-NEXT: slli a5, a1, 32
937 ; RV64-NEXT: add a1, a1, a5
938 ; RV64-NEXT: slli a5, a2, 32
939 ; RV64-NEXT: add a2, a2, a5
940 ; RV64-NEXT: slli a5, a3, 32
941 ; RV64-NEXT: add a3, a3, a5
942 ; RV64-NEXT: slli a5, a4, 32
943 ; RV64-NEXT: add a4, a4, a5
944 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
945 ; RV64-NEXT: vsrl.vi v10, v8, 1
946 ; RV64-NEXT: vand.vx v10, v10, a1
947 ; RV64-NEXT: vsub.vv v8, v8, v10
948 ; RV64-NEXT: vand.vx v10, v8, a2
949 ; RV64-NEXT: vsrl.vi v8, v8, 2
950 ; RV64-NEXT: vand.vx v8, v8, a2
951 ; RV64-NEXT: vadd.vv v8, v10, v8
952 ; RV64-NEXT: vsrl.vi v10, v8, 4
953 ; RV64-NEXT: vadd.vv v8, v8, v10
954 ; RV64-NEXT: vand.vx v8, v8, a3
955 ; RV64-NEXT: vmul.vx v8, v8, a4
956 ; RV64-NEXT: li a0, 56
957 ; RV64-NEXT: vsrl.vx v8, v8, a0
959 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
963 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
965 define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
966 ; RV32-LABEL: vp_ctpop_v8i64:
968 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
969 ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
970 ; RV32-NEXT: lui a1, 349525
971 ; RV32-NEXT: addi a1, a1, 1365
972 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
973 ; RV32-NEXT: vmv.v.x v16, a1
974 ; RV32-NEXT: lui a1, 209715
975 ; RV32-NEXT: addi a1, a1, 819
976 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
977 ; RV32-NEXT: vand.vv v16, v12, v16, v0.t
978 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
979 ; RV32-NEXT: vmv.v.x v12, a1
980 ; RV32-NEXT: lui a1, 61681
981 ; RV32-NEXT: addi a1, a1, -241
982 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
983 ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
984 ; RV32-NEXT: vand.vv v16, v8, v12, v0.t
985 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
986 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
987 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
988 ; RV32-NEXT: vmv.v.x v12, a1
989 ; RV32-NEXT: lui a1, 4112
990 ; RV32-NEXT: addi a1, a1, 257
991 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
992 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
993 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
994 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
995 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
996 ; RV32-NEXT: vmv.v.x v16, a1
997 ; RV32-NEXT: li a1, 56
998 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
999 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1000 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1001 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1004 ; RV64-LABEL: vp_ctpop_v8i64:
1006 ; RV64-NEXT: lui a1, 349525
1007 ; RV64-NEXT: lui a2, 209715
1008 ; RV64-NEXT: lui a3, 61681
1009 ; RV64-NEXT: lui a4, 4112
1010 ; RV64-NEXT: addiw a1, a1, 1365
1011 ; RV64-NEXT: addiw a2, a2, 819
1012 ; RV64-NEXT: addiw a3, a3, -241
1013 ; RV64-NEXT: addiw a4, a4, 257
1014 ; RV64-NEXT: slli a5, a1, 32
1015 ; RV64-NEXT: add a1, a1, a5
1016 ; RV64-NEXT: slli a5, a2, 32
1017 ; RV64-NEXT: add a2, a2, a5
1018 ; RV64-NEXT: slli a5, a3, 32
1019 ; RV64-NEXT: add a3, a3, a5
1020 ; RV64-NEXT: slli a5, a4, 32
1021 ; RV64-NEXT: add a4, a4, a5
1022 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1023 ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
1024 ; RV64-NEXT: vand.vx v12, v12, a1, v0.t
1025 ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
1026 ; RV64-NEXT: vand.vx v12, v8, a2, v0.t
1027 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1028 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1029 ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
1030 ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
1031 ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
1032 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1033 ; RV64-NEXT: li a0, 56
1034 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1035 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1037 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
1041 define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1042 ; RV32-LABEL: vp_ctpop_v8i64_unmasked:
1044 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1045 ; RV32-NEXT: vsrl.vi v12, v8, 1
1046 ; RV32-NEXT: lui a1, 349525
1047 ; RV32-NEXT: addi a1, a1, 1365
1048 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1049 ; RV32-NEXT: vmv.v.x v16, a1
1050 ; RV32-NEXT: lui a1, 209715
1051 ; RV32-NEXT: addi a1, a1, 819
1052 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1053 ; RV32-NEXT: vand.vv v12, v12, v16
1054 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1055 ; RV32-NEXT: vmv.v.x v16, a1
1056 ; RV32-NEXT: lui a1, 61681
1057 ; RV32-NEXT: addi a1, a1, -241
1058 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1059 ; RV32-NEXT: vsub.vv v8, v8, v12
1060 ; RV32-NEXT: vand.vv v12, v8, v16
1061 ; RV32-NEXT: vsrl.vi v8, v8, 2
1062 ; RV32-NEXT: vand.vv v8, v8, v16
1063 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1064 ; RV32-NEXT: vmv.v.x v16, a1
1065 ; RV32-NEXT: lui a1, 4112
1066 ; RV32-NEXT: addi a1, a1, 257
1067 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1068 ; RV32-NEXT: vadd.vv v8, v12, v8
1069 ; RV32-NEXT: vsrl.vi v12, v8, 4
1070 ; RV32-NEXT: vadd.vv v8, v8, v12
1071 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1072 ; RV32-NEXT: vmv.v.x v12, a1
1073 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1074 ; RV32-NEXT: vand.vv v8, v8, v16
1075 ; RV32-NEXT: vmul.vv v8, v8, v12
1076 ; RV32-NEXT: li a0, 56
1077 ; RV32-NEXT: vsrl.vx v8, v8, a0
1080 ; RV64-LABEL: vp_ctpop_v8i64_unmasked:
1082 ; RV64-NEXT: lui a1, 349525
1083 ; RV64-NEXT: lui a2, 209715
1084 ; RV64-NEXT: lui a3, 61681
1085 ; RV64-NEXT: lui a4, 4112
1086 ; RV64-NEXT: addiw a1, a1, 1365
1087 ; RV64-NEXT: addiw a2, a2, 819
1088 ; RV64-NEXT: addiw a3, a3, -241
1089 ; RV64-NEXT: addiw a4, a4, 257
1090 ; RV64-NEXT: slli a5, a1, 32
1091 ; RV64-NEXT: add a1, a1, a5
1092 ; RV64-NEXT: slli a5, a2, 32
1093 ; RV64-NEXT: add a2, a2, a5
1094 ; RV64-NEXT: slli a5, a3, 32
1095 ; RV64-NEXT: add a3, a3, a5
1096 ; RV64-NEXT: slli a5, a4, 32
1097 ; RV64-NEXT: add a4, a4, a5
1098 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1099 ; RV64-NEXT: vsrl.vi v12, v8, 1
1100 ; RV64-NEXT: vand.vx v12, v12, a1
1101 ; RV64-NEXT: vsub.vv v8, v8, v12
1102 ; RV64-NEXT: vand.vx v12, v8, a2
1103 ; RV64-NEXT: vsrl.vi v8, v8, 2
1104 ; RV64-NEXT: vand.vx v8, v8, a2
1105 ; RV64-NEXT: vadd.vv v8, v12, v8
1106 ; RV64-NEXT: vsrl.vi v12, v8, 4
1107 ; RV64-NEXT: vadd.vv v8, v8, v12
1108 ; RV64-NEXT: vand.vx v8, v8, a3
1109 ; RV64-NEXT: vmul.vx v8, v8, a4
1110 ; RV64-NEXT: li a0, 56
1111 ; RV64-NEXT: vsrl.vx v8, v8, a0
1113 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl)
1117 declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32)
1119 define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1120 ; RV32-LABEL: vp_ctpop_v15i64:
1122 ; RV32-NEXT: addi sp, sp, -48
1123 ; RV32-NEXT: .cfi_def_cfa_offset 48
1124 ; RV32-NEXT: csrr a1, vlenb
1125 ; RV32-NEXT: slli a1, a1, 3
1126 ; RV32-NEXT: sub sp, sp, a1
1127 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1128 ; RV32-NEXT: addi a1, sp, 48
1129 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1130 ; RV32-NEXT: lui a1, 349525
1131 ; RV32-NEXT: addi a1, a1, 1365
1132 ; RV32-NEXT: sw a1, 40(sp)
1133 ; RV32-NEXT: sw a1, 44(sp)
1134 ; RV32-NEXT: lui a1, 209715
1135 ; RV32-NEXT: addi a1, a1, 819
1136 ; RV32-NEXT: sw a1, 32(sp)
1137 ; RV32-NEXT: sw a1, 36(sp)
1138 ; RV32-NEXT: lui a1, 61681
1139 ; RV32-NEXT: addi a1, a1, -241
1140 ; RV32-NEXT: sw a1, 24(sp)
1141 ; RV32-NEXT: sw a1, 28(sp)
1142 ; RV32-NEXT: lui a1, 4112
1143 ; RV32-NEXT: addi a1, a1, 257
1144 ; RV32-NEXT: sw a1, 16(sp)
1145 ; RV32-NEXT: sw a1, 20(sp)
1146 ; RV32-NEXT: addi a1, sp, 40
1147 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1148 ; RV32-NEXT: vlse64.v v24, (a1), zero
1149 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1150 ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
1151 ; RV32-NEXT: addi a1, sp, 32
1152 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1153 ; RV32-NEXT: vlse64.v v16, (a1), zero
1154 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1155 ; RV32-NEXT: vand.vv v24, v8, v24, v0.t
1156 ; RV32-NEXT: addi a1, sp, 24
1157 ; RV32-NEXT: addi a2, sp, 48
1158 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1159 ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
1160 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1161 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1162 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1163 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1164 ; RV32-NEXT: vlse64.v v8, (a1), zero
1165 ; RV32-NEXT: addi a1, sp, 48
1166 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1167 ; RV32-NEXT: addi a1, sp, 16
1168 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1169 ; RV32-NEXT: vadd.vv v24, v24, v16, v0.t
1170 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1171 ; RV32-NEXT: vlse64.v v16, (a1), zero
1172 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1173 ; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t
1174 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1175 ; RV32-NEXT: li a0, 56
1176 ; RV32-NEXT: addi a1, sp, 48
1177 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1178 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1179 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1180 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1181 ; RV32-NEXT: csrr a0, vlenb
1182 ; RV32-NEXT: slli a0, a0, 3
1183 ; RV32-NEXT: add sp, sp, a0
1184 ; RV32-NEXT: .cfi_def_cfa sp, 48
1185 ; RV32-NEXT: addi sp, sp, 48
1186 ; RV32-NEXT: .cfi_def_cfa_offset 0
1189 ; RV64-LABEL: vp_ctpop_v15i64:
1191 ; RV64-NEXT: lui a1, 349525
1192 ; RV64-NEXT: lui a2, 209715
1193 ; RV64-NEXT: lui a3, 61681
1194 ; RV64-NEXT: lui a4, 4112
1195 ; RV64-NEXT: addiw a1, a1, 1365
1196 ; RV64-NEXT: addiw a2, a2, 819
1197 ; RV64-NEXT: addiw a3, a3, -241
1198 ; RV64-NEXT: addiw a4, a4, 257
1199 ; RV64-NEXT: slli a5, a1, 32
1200 ; RV64-NEXT: add a1, a1, a5
1201 ; RV64-NEXT: slli a5, a2, 32
1202 ; RV64-NEXT: add a2, a2, a5
1203 ; RV64-NEXT: slli a5, a3, 32
1204 ; RV64-NEXT: add a3, a3, a5
1205 ; RV64-NEXT: slli a5, a4, 32
1206 ; RV64-NEXT: add a4, a4, a5
1207 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1208 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1209 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1210 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1211 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
1212 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1213 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1214 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1215 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1216 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1217 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1218 ; RV64-NEXT: li a0, 56
1219 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1220 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1222 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
1226 define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1227 ; RV32-LABEL: vp_ctpop_v15i64_unmasked:
1229 ; RV32-NEXT: addi sp, sp, -32
1230 ; RV32-NEXT: .cfi_def_cfa_offset 32
1231 ; RV32-NEXT: lui a1, 349525
1232 ; RV32-NEXT: addi a1, a1, 1365
1233 ; RV32-NEXT: sw a1, 24(sp)
1234 ; RV32-NEXT: sw a1, 28(sp)
1235 ; RV32-NEXT: lui a1, 209715
1236 ; RV32-NEXT: addi a1, a1, 819
1237 ; RV32-NEXT: sw a1, 16(sp)
1238 ; RV32-NEXT: sw a1, 20(sp)
1239 ; RV32-NEXT: lui a1, 61681
1240 ; RV32-NEXT: addi a1, a1, -241
1241 ; RV32-NEXT: sw a1, 8(sp)
1242 ; RV32-NEXT: sw a1, 12(sp)
1243 ; RV32-NEXT: lui a1, 4112
1244 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1245 ; RV32-NEXT: vsrl.vi v16, v8, 1
1246 ; RV32-NEXT: addi a1, a1, 257
1247 ; RV32-NEXT: sw a1, 0(sp)
1248 ; RV32-NEXT: sw a1, 4(sp)
1249 ; RV32-NEXT: addi a1, sp, 24
1250 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1251 ; RV32-NEXT: vlse64.v v0, (a1), zero
1252 ; RV32-NEXT: addi a1, sp, 16
1253 ; RV32-NEXT: vlse64.v v24, (a1), zero
1254 ; RV32-NEXT: addi a1, sp, 8
1255 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1256 ; RV32-NEXT: vand.vv v0, v16, v0
1257 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1258 ; RV32-NEXT: vlse64.v v16, (a1), zero
1259 ; RV32-NEXT: mv a1, sp
1260 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1261 ; RV32-NEXT: vsub.vv v8, v8, v0
1262 ; RV32-NEXT: vand.vv v0, v8, v24
1263 ; RV32-NEXT: vsrl.vi v8, v8, 2
1264 ; RV32-NEXT: vand.vv v8, v8, v24
1265 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1266 ; RV32-NEXT: vlse64.v v24, (a1), zero
1267 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1268 ; RV32-NEXT: vadd.vv v8, v0, v8
1269 ; RV32-NEXT: vsrl.vi v0, v8, 4
1270 ; RV32-NEXT: vadd.vv v8, v8, v0
1271 ; RV32-NEXT: vand.vv v8, v8, v16
1272 ; RV32-NEXT: vmul.vv v8, v8, v24
1273 ; RV32-NEXT: li a0, 56
1274 ; RV32-NEXT: vsrl.vx v8, v8, a0
1275 ; RV32-NEXT: addi sp, sp, 32
1276 ; RV32-NEXT: .cfi_def_cfa_offset 0
1279 ; RV64-LABEL: vp_ctpop_v15i64_unmasked:
1281 ; RV64-NEXT: lui a1, 349525
1282 ; RV64-NEXT: lui a2, 209715
1283 ; RV64-NEXT: lui a3, 61681
1284 ; RV64-NEXT: lui a4, 4112
1285 ; RV64-NEXT: addiw a1, a1, 1365
1286 ; RV64-NEXT: addiw a2, a2, 819
1287 ; RV64-NEXT: addiw a3, a3, -241
1288 ; RV64-NEXT: addiw a4, a4, 257
1289 ; RV64-NEXT: slli a5, a1, 32
1290 ; RV64-NEXT: add a1, a1, a5
1291 ; RV64-NEXT: slli a5, a2, 32
1292 ; RV64-NEXT: add a2, a2, a5
1293 ; RV64-NEXT: slli a5, a3, 32
1294 ; RV64-NEXT: add a3, a3, a5
1295 ; RV64-NEXT: slli a5, a4, 32
1296 ; RV64-NEXT: add a4, a4, a5
1297 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1298 ; RV64-NEXT: vsrl.vi v16, v8, 1
1299 ; RV64-NEXT: vand.vx v16, v16, a1
1300 ; RV64-NEXT: vsub.vv v8, v8, v16
1301 ; RV64-NEXT: vand.vx v16, v8, a2
1302 ; RV64-NEXT: vsrl.vi v8, v8, 2
1303 ; RV64-NEXT: vand.vx v8, v8, a2
1304 ; RV64-NEXT: vadd.vv v8, v16, v8
1305 ; RV64-NEXT: vsrl.vi v16, v8, 4
1306 ; RV64-NEXT: vadd.vv v8, v8, v16
1307 ; RV64-NEXT: vand.vx v8, v8, a3
1308 ; RV64-NEXT: vmul.vx v8, v8, a4
1309 ; RV64-NEXT: li a0, 56
1310 ; RV64-NEXT: vsrl.vx v8, v8, a0
1312 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl)
1316 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
1318 define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
1319 ; RV32-LABEL: vp_ctpop_v16i64:
1321 ; RV32-NEXT: addi sp, sp, -48
1322 ; RV32-NEXT: .cfi_def_cfa_offset 48
1323 ; RV32-NEXT: csrr a1, vlenb
1324 ; RV32-NEXT: slli a1, a1, 3
1325 ; RV32-NEXT: sub sp, sp, a1
1326 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1327 ; RV32-NEXT: addi a1, sp, 48
1328 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1329 ; RV32-NEXT: lui a1, 349525
1330 ; RV32-NEXT: addi a1, a1, 1365
1331 ; RV32-NEXT: sw a1, 40(sp)
1332 ; RV32-NEXT: sw a1, 44(sp)
1333 ; RV32-NEXT: lui a1, 209715
1334 ; RV32-NEXT: addi a1, a1, 819
1335 ; RV32-NEXT: sw a1, 32(sp)
1336 ; RV32-NEXT: sw a1, 36(sp)
1337 ; RV32-NEXT: lui a1, 61681
1338 ; RV32-NEXT: addi a1, a1, -241
1339 ; RV32-NEXT: sw a1, 24(sp)
1340 ; RV32-NEXT: sw a1, 28(sp)
1341 ; RV32-NEXT: lui a1, 4112
1342 ; RV32-NEXT: addi a1, a1, 257
1343 ; RV32-NEXT: sw a1, 16(sp)
1344 ; RV32-NEXT: sw a1, 20(sp)
1345 ; RV32-NEXT: addi a1, sp, 40
1346 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1347 ; RV32-NEXT: vlse64.v v24, (a1), zero
1348 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1349 ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
1350 ; RV32-NEXT: addi a1, sp, 32
1351 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1352 ; RV32-NEXT: vlse64.v v16, (a1), zero
1353 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1354 ; RV32-NEXT: vand.vv v24, v8, v24, v0.t
1355 ; RV32-NEXT: addi a1, sp, 24
1356 ; RV32-NEXT: addi a2, sp, 48
1357 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1358 ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
1359 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1360 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1361 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1362 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1363 ; RV32-NEXT: vlse64.v v8, (a1), zero
1364 ; RV32-NEXT: addi a1, sp, 48
1365 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1366 ; RV32-NEXT: addi a1, sp, 16
1367 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1368 ; RV32-NEXT: vadd.vv v24, v24, v16, v0.t
1369 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1370 ; RV32-NEXT: vlse64.v v16, (a1), zero
1371 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1372 ; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t
1373 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1374 ; RV32-NEXT: li a0, 56
1375 ; RV32-NEXT: addi a1, sp, 48
1376 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1377 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1378 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1379 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1380 ; RV32-NEXT: csrr a0, vlenb
1381 ; RV32-NEXT: slli a0, a0, 3
1382 ; RV32-NEXT: add sp, sp, a0
1383 ; RV32-NEXT: .cfi_def_cfa sp, 48
1384 ; RV32-NEXT: addi sp, sp, 48
1385 ; RV32-NEXT: .cfi_def_cfa_offset 0
1388 ; RV64-LABEL: vp_ctpop_v16i64:
1390 ; RV64-NEXT: lui a1, 349525
1391 ; RV64-NEXT: lui a2, 209715
1392 ; RV64-NEXT: lui a3, 61681
1393 ; RV64-NEXT: lui a4, 4112
1394 ; RV64-NEXT: addiw a1, a1, 1365
1395 ; RV64-NEXT: addiw a2, a2, 819
1396 ; RV64-NEXT: addiw a3, a3, -241
1397 ; RV64-NEXT: addiw a4, a4, 257
1398 ; RV64-NEXT: slli a5, a1, 32
1399 ; RV64-NEXT: add a1, a1, a5
1400 ; RV64-NEXT: slli a5, a2, 32
1401 ; RV64-NEXT: add a2, a2, a5
1402 ; RV64-NEXT: slli a5, a3, 32
1403 ; RV64-NEXT: add a3, a3, a5
1404 ; RV64-NEXT: slli a5, a4, 32
1405 ; RV64-NEXT: add a4, a4, a5
1406 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1407 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1408 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1409 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1410 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
1411 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1412 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1413 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1414 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1415 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1416 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1417 ; RV64-NEXT: li a0, 56
1418 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1419 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1421 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
1425 define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1426 ; RV32-LABEL: vp_ctpop_v16i64_unmasked:
1428 ; RV32-NEXT: addi sp, sp, -32
1429 ; RV32-NEXT: .cfi_def_cfa_offset 32
1430 ; RV32-NEXT: lui a1, 349525
1431 ; RV32-NEXT: addi a1, a1, 1365
1432 ; RV32-NEXT: sw a1, 24(sp)
1433 ; RV32-NEXT: sw a1, 28(sp)
1434 ; RV32-NEXT: lui a1, 209715
1435 ; RV32-NEXT: addi a1, a1, 819
1436 ; RV32-NEXT: sw a1, 16(sp)
1437 ; RV32-NEXT: sw a1, 20(sp)
1438 ; RV32-NEXT: lui a1, 61681
1439 ; RV32-NEXT: addi a1, a1, -241
1440 ; RV32-NEXT: sw a1, 8(sp)
1441 ; RV32-NEXT: sw a1, 12(sp)
1442 ; RV32-NEXT: lui a1, 4112
1443 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1444 ; RV32-NEXT: vsrl.vi v16, v8, 1
1445 ; RV32-NEXT: addi a1, a1, 257
1446 ; RV32-NEXT: sw a1, 0(sp)
1447 ; RV32-NEXT: sw a1, 4(sp)
1448 ; RV32-NEXT: addi a1, sp, 24
1449 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1450 ; RV32-NEXT: vlse64.v v0, (a1), zero
1451 ; RV32-NEXT: addi a1, sp, 16
1452 ; RV32-NEXT: vlse64.v v24, (a1), zero
1453 ; RV32-NEXT: addi a1, sp, 8
1454 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1455 ; RV32-NEXT: vand.vv v0, v16, v0
1456 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1457 ; RV32-NEXT: vlse64.v v16, (a1), zero
1458 ; RV32-NEXT: mv a1, sp
1459 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1460 ; RV32-NEXT: vsub.vv v8, v8, v0
1461 ; RV32-NEXT: vand.vv v0, v8, v24
1462 ; RV32-NEXT: vsrl.vi v8, v8, 2
1463 ; RV32-NEXT: vand.vv v8, v8, v24
1464 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1465 ; RV32-NEXT: vlse64.v v24, (a1), zero
1466 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1467 ; RV32-NEXT: vadd.vv v8, v0, v8
1468 ; RV32-NEXT: vsrl.vi v0, v8, 4
1469 ; RV32-NEXT: vadd.vv v8, v8, v0
1470 ; RV32-NEXT: vand.vv v8, v8, v16
1471 ; RV32-NEXT: vmul.vv v8, v8, v24
1472 ; RV32-NEXT: li a0, 56
1473 ; RV32-NEXT: vsrl.vx v8, v8, a0
1474 ; RV32-NEXT: addi sp, sp, 32
1475 ; RV32-NEXT: .cfi_def_cfa_offset 0
1478 ; RV64-LABEL: vp_ctpop_v16i64_unmasked:
1480 ; RV64-NEXT: lui a1, 349525
1481 ; RV64-NEXT: lui a2, 209715
1482 ; RV64-NEXT: lui a3, 61681
1483 ; RV64-NEXT: lui a4, 4112
1484 ; RV64-NEXT: addiw a1, a1, 1365
1485 ; RV64-NEXT: addiw a2, a2, 819
1486 ; RV64-NEXT: addiw a3, a3, -241
1487 ; RV64-NEXT: addiw a4, a4, 257
1488 ; RV64-NEXT: slli a5, a1, 32
1489 ; RV64-NEXT: add a1, a1, a5
1490 ; RV64-NEXT: slli a5, a2, 32
1491 ; RV64-NEXT: add a2, a2, a5
1492 ; RV64-NEXT: slli a5, a3, 32
1493 ; RV64-NEXT: add a3, a3, a5
1494 ; RV64-NEXT: slli a5, a4, 32
1495 ; RV64-NEXT: add a4, a4, a5
1496 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1497 ; RV64-NEXT: vsrl.vi v16, v8, 1
1498 ; RV64-NEXT: vand.vx v16, v16, a1
1499 ; RV64-NEXT: vsub.vv v8, v8, v16
1500 ; RV64-NEXT: vand.vx v16, v8, a2
1501 ; RV64-NEXT: vsrl.vi v8, v8, 2
1502 ; RV64-NEXT: vand.vx v8, v8, a2
1503 ; RV64-NEXT: vadd.vv v8, v16, v8
1504 ; RV64-NEXT: vsrl.vi v16, v8, 4
1505 ; RV64-NEXT: vadd.vv v8, v8, v16
1506 ; RV64-NEXT: vand.vx v8, v8, a3
1507 ; RV64-NEXT: vmul.vx v8, v8, a4
1508 ; RV64-NEXT: li a0, 56
1509 ; RV64-NEXT: vsrl.vx v8, v8, a0
1511 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl)
1515 declare <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64>, <32 x i1>, i32)
1517 define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
1518 ; RV32-LABEL: vp_ctpop_v32i64:
1520 ; RV32-NEXT: addi sp, sp, -48
1521 ; RV32-NEXT: .cfi_def_cfa_offset 48
1522 ; RV32-NEXT: csrr a1, vlenb
1523 ; RV32-NEXT: li a2, 48
1524 ; RV32-NEXT: mul a1, a1, a2
1525 ; RV32-NEXT: sub sp, sp, a1
1526 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
1527 ; RV32-NEXT: csrr a1, vlenb
1528 ; RV32-NEXT: slli a1, a1, 4
1529 ; RV32-NEXT: add a1, sp, a1
1530 ; RV32-NEXT: addi a1, a1, 48
1531 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1532 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1533 ; RV32-NEXT: vslidedown.vi v7, v0, 2
1534 ; RV32-NEXT: lui a1, 349525
1535 ; RV32-NEXT: lui a2, 209715
1536 ; RV32-NEXT: addi a1, a1, 1365
1537 ; RV32-NEXT: sw a1, 40(sp)
1538 ; RV32-NEXT: sw a1, 44(sp)
1539 ; RV32-NEXT: lui a1, 61681
1540 ; RV32-NEXT: addi a2, a2, 819
1541 ; RV32-NEXT: sw a2, 32(sp)
1542 ; RV32-NEXT: sw a2, 36(sp)
1543 ; RV32-NEXT: lui a2, 4112
1544 ; RV32-NEXT: addi a1, a1, -241
1545 ; RV32-NEXT: sw a1, 24(sp)
1546 ; RV32-NEXT: sw a1, 28(sp)
1547 ; RV32-NEXT: li a3, 16
1548 ; RV32-NEXT: addi a1, a2, 257
1549 ; RV32-NEXT: sw a1, 16(sp)
1550 ; RV32-NEXT: sw a1, 20(sp)
1551 ; RV32-NEXT: mv a1, a0
1552 ; RV32-NEXT: bltu a0, a3, .LBB34_2
1553 ; RV32-NEXT: # %bb.1:
1554 ; RV32-NEXT: li a1, 16
1555 ; RV32-NEXT: .LBB34_2:
1556 ; RV32-NEXT: addi a2, sp, 40
1557 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1558 ; RV32-NEXT: vlse64.v v16, (a2), zero
1559 ; RV32-NEXT: csrr a2, vlenb
1560 ; RV32-NEXT: li a3, 40
1561 ; RV32-NEXT: mul a2, a2, a3
1562 ; RV32-NEXT: add a2, sp, a2
1563 ; RV32-NEXT: addi a2, a2, 48
1564 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1565 ; RV32-NEXT: addi a2, sp, 32
1566 ; RV32-NEXT: vlse64.v v16, (a2), zero
1567 ; RV32-NEXT: csrr a2, vlenb
1568 ; RV32-NEXT: slli a2, a2, 5
1569 ; RV32-NEXT: add a2, sp, a2
1570 ; RV32-NEXT: addi a2, a2, 48
1571 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1572 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1573 ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
1574 ; RV32-NEXT: csrr a2, vlenb
1575 ; RV32-NEXT: li a3, 40
1576 ; RV32-NEXT: mul a2, a2, a3
1577 ; RV32-NEXT: add a2, sp, a2
1578 ; RV32-NEXT: addi a2, a2, 48
1579 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1580 ; RV32-NEXT: vand.vv v16, v24, v16, v0.t
1581 ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1582 ; RV32-NEXT: csrr a2, vlenb
1583 ; RV32-NEXT: slli a2, a2, 5
1584 ; RV32-NEXT: add a2, sp, a2
1585 ; RV32-NEXT: addi a2, a2, 48
1586 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1587 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1588 ; RV32-NEXT: csrr a2, vlenb
1589 ; RV32-NEXT: li a3, 24
1590 ; RV32-NEXT: mul a2, a2, a3
1591 ; RV32-NEXT: add a2, sp, a2
1592 ; RV32-NEXT: addi a2, a2, 48
1593 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
1594 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1595 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1596 ; RV32-NEXT: csrr a2, vlenb
1597 ; RV32-NEXT: li a3, 24
1598 ; RV32-NEXT: mul a2, a2, a3
1599 ; RV32-NEXT: add a2, sp, a2
1600 ; RV32-NEXT: addi a2, a2, 48
1601 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1602 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
1603 ; RV32-NEXT: csrr a2, vlenb
1604 ; RV32-NEXT: slli a2, a2, 3
1605 ; RV32-NEXT: add a2, sp, a2
1606 ; RV32-NEXT: addi a2, a2, 48
1607 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1608 ; RV32-NEXT: addi a2, sp, 24
1609 ; RV32-NEXT: addi a3, sp, 16
1610 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1611 ; RV32-NEXT: vlse64.v v16, (a2), zero
1612 ; RV32-NEXT: addi a2, sp, 48
1613 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1614 ; RV32-NEXT: vlse64.v v8, (a3), zero
1615 ; RV32-NEXT: csrr a2, vlenb
1616 ; RV32-NEXT: li a3, 24
1617 ; RV32-NEXT: mul a2, a2, a3
1618 ; RV32-NEXT: add a2, sp, a2
1619 ; RV32-NEXT: addi a2, a2, 48
1620 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1621 ; RV32-NEXT: csrr a2, vlenb
1622 ; RV32-NEXT: slli a2, a2, 3
1623 ; RV32-NEXT: add a2, sp, a2
1624 ; RV32-NEXT: addi a2, a2, 48
1625 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1626 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1627 ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t
1628 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1629 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1630 ; RV32-NEXT: csrr a1, vlenb
1631 ; RV32-NEXT: li a2, 24
1632 ; RV32-NEXT: mul a1, a1, a2
1633 ; RV32-NEXT: add a1, sp, a1
1634 ; RV32-NEXT: addi a1, a1, 48
1635 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1636 ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t
1637 ; RV32-NEXT: li a1, 56
1638 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1639 ; RV32-NEXT: csrr a2, vlenb
1640 ; RV32-NEXT: slli a2, a2, 3
1641 ; RV32-NEXT: add a2, sp, a2
1642 ; RV32-NEXT: addi a2, a2, 48
1643 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1644 ; RV32-NEXT: addi a2, a0, -16
1645 ; RV32-NEXT: sltu a0, a0, a2
1646 ; RV32-NEXT: addi a0, a0, -1
1647 ; RV32-NEXT: and a0, a0, a2
1648 ; RV32-NEXT: vmv1r.v v0, v7
1649 ; RV32-NEXT: csrr a2, vlenb
1650 ; RV32-NEXT: slli a2, a2, 4
1651 ; RV32-NEXT: add a2, sp, a2
1652 ; RV32-NEXT: addi a2, a2, 48
1653 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1654 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1655 ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
1656 ; RV32-NEXT: csrr a0, vlenb
1657 ; RV32-NEXT: li a2, 40
1658 ; RV32-NEXT: mul a0, a0, a2
1659 ; RV32-NEXT: add a0, sp, a0
1660 ; RV32-NEXT: addi a0, a0, 48
1661 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1662 ; RV32-NEXT: vand.vv v8, v24, v8, v0.t
1663 ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
1664 ; RV32-NEXT: csrr a0, vlenb
1665 ; RV32-NEXT: slli a0, a0, 5
1666 ; RV32-NEXT: add a0, sp, a0
1667 ; RV32-NEXT: addi a0, a0, 48
1668 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1669 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1670 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1671 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1672 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1673 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1674 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1675 ; RV32-NEXT: addi a0, sp, 48
1676 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1677 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1678 ; RV32-NEXT: csrr a0, vlenb
1679 ; RV32-NEXT: li a2, 24
1680 ; RV32-NEXT: mul a0, a0, a2
1681 ; RV32-NEXT: add a0, sp, a0
1682 ; RV32-NEXT: addi a0, a0, 48
1683 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1684 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1685 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
1686 ; RV32-NEXT: csrr a0, vlenb
1687 ; RV32-NEXT: slli a0, a0, 3
1688 ; RV32-NEXT: add a0, sp, a0
1689 ; RV32-NEXT: addi a0, a0, 48
1690 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1691 ; RV32-NEXT: csrr a0, vlenb
1692 ; RV32-NEXT: li a1, 48
1693 ; RV32-NEXT: mul a0, a0, a1
1694 ; RV32-NEXT: add sp, sp, a0
1695 ; RV32-NEXT: .cfi_def_cfa sp, 48
1696 ; RV32-NEXT: addi sp, sp, 48
1697 ; RV32-NEXT: .cfi_def_cfa_offset 0
1700 ; RV64-LABEL: vp_ctpop_v32i64:
1702 ; RV64-NEXT: addi sp, sp, -16
1703 ; RV64-NEXT: .cfi_def_cfa_offset 16
1704 ; RV64-NEXT: csrr a1, vlenb
1705 ; RV64-NEXT: slli a1, a1, 4
1706 ; RV64-NEXT: sub sp, sp, a1
1707 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1708 ; RV64-NEXT: csrr a1, vlenb
1709 ; RV64-NEXT: slli a1, a1, 3
1710 ; RV64-NEXT: add a1, sp, a1
1711 ; RV64-NEXT: addi a1, a1, 16
1712 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1713 ; RV64-NEXT: li a2, 16
1714 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1715 ; RV64-NEXT: vslidedown.vi v24, v0, 2
1716 ; RV64-NEXT: mv a1, a0
1717 ; RV64-NEXT: bltu a0, a2, .LBB34_2
1718 ; RV64-NEXT: # %bb.1:
1719 ; RV64-NEXT: li a1, 16
1720 ; RV64-NEXT: .LBB34_2:
1721 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1722 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1723 ; RV64-NEXT: lui a1, 349525
1724 ; RV64-NEXT: lui a2, 209715
1725 ; RV64-NEXT: lui a3, 61681
1726 ; RV64-NEXT: lui a4, 4112
1727 ; RV64-NEXT: addiw a1, a1, 1365
1728 ; RV64-NEXT: addiw a2, a2, 819
1729 ; RV64-NEXT: addiw a3, a3, -241
1730 ; RV64-NEXT: addiw a4, a4, 257
1731 ; RV64-NEXT: slli a5, a1, 32
1732 ; RV64-NEXT: add a5, a1, a5
1733 ; RV64-NEXT: slli a1, a2, 32
1734 ; RV64-NEXT: add a6, a2, a1
1735 ; RV64-NEXT: slli a1, a3, 32
1736 ; RV64-NEXT: add a1, a3, a1
1737 ; RV64-NEXT: slli a2, a4, 32
1738 ; RV64-NEXT: add a2, a4, a2
1739 ; RV64-NEXT: addi a3, a0, -16
1740 ; RV64-NEXT: sltu a0, a0, a3
1741 ; RV64-NEXT: addi a0, a0, -1
1742 ; RV64-NEXT: and a0, a0, a3
1743 ; RV64-NEXT: li a3, 56
1744 ; RV64-NEXT: vand.vx v16, v16, a5, v0.t
1745 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1746 ; RV64-NEXT: vand.vx v16, v8, a6, v0.t
1747 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1748 ; RV64-NEXT: vand.vx v8, v8, a6, v0.t
1749 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1750 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1751 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1752 ; RV64-NEXT: vand.vx v8, v8, a1, v0.t
1753 ; RV64-NEXT: vmul.vx v8, v8, a2, v0.t
1754 ; RV64-NEXT: vsrl.vx v8, v8, a3, v0.t
1755 ; RV64-NEXT: addi a4, sp, 16
1756 ; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
1757 ; RV64-NEXT: vmv1r.v v0, v24
1758 ; RV64-NEXT: csrr a4, vlenb
1759 ; RV64-NEXT: slli a4, a4, 3
1760 ; RV64-NEXT: add a4, sp, a4
1761 ; RV64-NEXT: addi a4, a4, 16
1762 ; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
1763 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1764 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1765 ; RV64-NEXT: vand.vx v16, v16, a5, v0.t
1766 ; RV64-NEXT: vsub.vv v16, v8, v16, v0.t
1767 ; RV64-NEXT: vand.vx v8, v16, a6, v0.t
1768 ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
1769 ; RV64-NEXT: vand.vx v16, v16, a6, v0.t
1770 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1771 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1772 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1773 ; RV64-NEXT: vand.vx v8, v8, a1, v0.t
1774 ; RV64-NEXT: vmul.vx v8, v8, a2, v0.t
1775 ; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t
1776 ; RV64-NEXT: addi a0, sp, 16
1777 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1778 ; RV64-NEXT: csrr a0, vlenb
1779 ; RV64-NEXT: slli a0, a0, 4
1780 ; RV64-NEXT: add sp, sp, a0
1781 ; RV64-NEXT: .cfi_def_cfa sp, 16
1782 ; RV64-NEXT: addi sp, sp, 16
1783 ; RV64-NEXT: .cfi_def_cfa_offset 0
1785 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
1789 define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
1790 ; RV32-LABEL: vp_ctpop_v32i64_unmasked:
1792 ; RV32-NEXT: addi sp, sp, -48
1793 ; RV32-NEXT: .cfi_def_cfa_offset 48
1794 ; RV32-NEXT: csrr a1, vlenb
1795 ; RV32-NEXT: li a2, 24
1796 ; RV32-NEXT: mul a1, a1, a2
1797 ; RV32-NEXT: sub sp, sp, a1
1798 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1799 ; RV32-NEXT: vmv8r.v v24, v16
1800 ; RV32-NEXT: lui a1, 349525
1801 ; RV32-NEXT: lui a2, 209715
1802 ; RV32-NEXT: addi a1, a1, 1365
1803 ; RV32-NEXT: sw a1, 40(sp)
1804 ; RV32-NEXT: sw a1, 44(sp)
1805 ; RV32-NEXT: lui a1, 61681
1806 ; RV32-NEXT: addi a2, a2, 819
1807 ; RV32-NEXT: sw a2, 32(sp)
1808 ; RV32-NEXT: sw a2, 36(sp)
1809 ; RV32-NEXT: lui a2, 4112
1810 ; RV32-NEXT: addi a1, a1, -241
1811 ; RV32-NEXT: sw a1, 24(sp)
1812 ; RV32-NEXT: sw a1, 28(sp)
1813 ; RV32-NEXT: li a3, 16
1814 ; RV32-NEXT: addi a1, a2, 257
1815 ; RV32-NEXT: sw a1, 16(sp)
1816 ; RV32-NEXT: sw a1, 20(sp)
1817 ; RV32-NEXT: mv a1, a0
1818 ; RV32-NEXT: bltu a0, a3, .LBB35_2
1819 ; RV32-NEXT: # %bb.1:
1820 ; RV32-NEXT: li a1, 16
1821 ; RV32-NEXT: .LBB35_2:
1822 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1823 ; RV32-NEXT: vsrl.vi v0, v8, 1
1824 ; RV32-NEXT: addi a2, sp, 40
1825 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1826 ; RV32-NEXT: vlse64.v v16, (a2), zero
1827 ; RV32-NEXT: addi a2, a0, -16
1828 ; RV32-NEXT: sltu a0, a0, a2
1829 ; RV32-NEXT: addi a0, a0, -1
1830 ; RV32-NEXT: and a0, a0, a2
1831 ; RV32-NEXT: addi a2, sp, 32
1832 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1833 ; RV32-NEXT: vand.vv v0, v0, v16
1834 ; RV32-NEXT: csrr a3, vlenb
1835 ; RV32-NEXT: slli a3, a3, 4
1836 ; RV32-NEXT: add a3, sp, a3
1837 ; RV32-NEXT: addi a3, a3, 48
1838 ; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
1839 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1840 ; RV32-NEXT: vlse64.v v0, (a2), zero
1841 ; RV32-NEXT: csrr a2, vlenb
1842 ; RV32-NEXT: slli a2, a2, 3
1843 ; RV32-NEXT: add a2, sp, a2
1844 ; RV32-NEXT: addi a2, a2, 48
1845 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1846 ; RV32-NEXT: vmv8r.v v8, v24
1847 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1848 ; RV32-NEXT: vsrl.vi v24, v24, 1
1849 ; RV32-NEXT: vand.vv v16, v24, v16
1850 ; RV32-NEXT: addi a2, sp, 48
1851 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1852 ; RV32-NEXT: csrr a2, vlenb
1853 ; RV32-NEXT: slli a2, a2, 4
1854 ; RV32-NEXT: add a2, sp, a2
1855 ; RV32-NEXT: addi a2, a2, 48
1856 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1857 ; RV32-NEXT: csrr a2, vlenb
1858 ; RV32-NEXT: slli a2, a2, 3
1859 ; RV32-NEXT: add a2, sp, a2
1860 ; RV32-NEXT: addi a2, a2, 48
1861 ; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
1862 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1863 ; RV32-NEXT: vsub.vv v16, v24, v16
1864 ; RV32-NEXT: addi a2, sp, 48
1865 ; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
1866 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1867 ; RV32-NEXT: vsub.vv v8, v8, v24
1868 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1869 ; RV32-NEXT: vand.vv v24, v16, v0
1870 ; RV32-NEXT: csrr a2, vlenb
1871 ; RV32-NEXT: slli a2, a2, 3
1872 ; RV32-NEXT: add a2, sp, a2
1873 ; RV32-NEXT: addi a2, a2, 48
1874 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
1875 ; RV32-NEXT: vsrl.vi v16, v16, 2
1876 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1877 ; RV32-NEXT: vand.vv v24, v8, v0
1878 ; RV32-NEXT: csrr a2, vlenb
1879 ; RV32-NEXT: slli a2, a2, 4
1880 ; RV32-NEXT: add a2, sp, a2
1881 ; RV32-NEXT: addi a2, a2, 48
1882 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
1883 ; RV32-NEXT: vsrl.vi v8, v8, 2
1884 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1885 ; RV32-NEXT: vand.vv v16, v16, v0
1886 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1887 ; RV32-NEXT: vand.vv v0, v8, v0
1888 ; RV32-NEXT: addi a2, sp, 24
1889 ; RV32-NEXT: csrr a3, vlenb
1890 ; RV32-NEXT: slli a3, a3, 3
1891 ; RV32-NEXT: add a3, sp, a3
1892 ; RV32-NEXT: addi a3, a3, 48
1893 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
1894 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1895 ; RV32-NEXT: vadd.vv v16, v8, v16
1896 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1897 ; RV32-NEXT: vlse64.v v8, (a2), zero
1898 ; RV32-NEXT: addi a2, sp, 16
1899 ; RV32-NEXT: csrr a3, vlenb
1900 ; RV32-NEXT: slli a3, a3, 4
1901 ; RV32-NEXT: add a3, sp, a3
1902 ; RV32-NEXT: addi a3, a3, 48
1903 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1904 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1905 ; RV32-NEXT: vadd.vv v24, v24, v0
1906 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1907 ; RV32-NEXT: vsrl.vi v0, v16, 4
1908 ; RV32-NEXT: vadd.vv v16, v16, v0
1909 ; RV32-NEXT: csrr a3, vlenb
1910 ; RV32-NEXT: slli a3, a3, 4
1911 ; RV32-NEXT: add a3, sp, a3
1912 ; RV32-NEXT: addi a3, a3, 48
1913 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1914 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1915 ; RV32-NEXT: vlse64.v v0, (a2), zero
1916 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1917 ; RV32-NEXT: vsrl.vi v16, v24, 4
1918 ; RV32-NEXT: vadd.vv v16, v24, v16
1919 ; RV32-NEXT: csrr a2, vlenb
1920 ; RV32-NEXT: slli a2, a2, 4
1921 ; RV32-NEXT: add a2, sp, a2
1922 ; RV32-NEXT: addi a2, a2, 48
1923 ; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
1924 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1925 ; RV32-NEXT: vand.vv v24, v24, v8
1926 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1927 ; RV32-NEXT: vand.vv v8, v16, v8
1928 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1929 ; RV32-NEXT: vmul.vv v16, v24, v0
1930 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1931 ; RV32-NEXT: vmul.vv v24, v8, v0
1932 ; RV32-NEXT: li a2, 56
1933 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1934 ; RV32-NEXT: vsrl.vx v8, v16, a2
1935 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1936 ; RV32-NEXT: vsrl.vx v16, v24, a2
1937 ; RV32-NEXT: csrr a0, vlenb
1938 ; RV32-NEXT: li a1, 24
1939 ; RV32-NEXT: mul a0, a0, a1
1940 ; RV32-NEXT: add sp, sp, a0
1941 ; RV32-NEXT: .cfi_def_cfa sp, 48
1942 ; RV32-NEXT: addi sp, sp, 48
1943 ; RV32-NEXT: .cfi_def_cfa_offset 0
1946 ; RV64-LABEL: vp_ctpop_v32i64_unmasked:
1948 ; RV64-NEXT: li a2, 16
1949 ; RV64-NEXT: mv a1, a0
1950 ; RV64-NEXT: bltu a0, a2, .LBB35_2
1951 ; RV64-NEXT: # %bb.1:
1952 ; RV64-NEXT: li a1, 16
1953 ; RV64-NEXT: .LBB35_2:
1954 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1955 ; RV64-NEXT: vsrl.vi v24, v8, 1
1956 ; RV64-NEXT: lui a2, 349525
1957 ; RV64-NEXT: lui a3, 209715
1958 ; RV64-NEXT: lui a4, 61681
1959 ; RV64-NEXT: lui a5, 4112
1960 ; RV64-NEXT: addiw a2, a2, 1365
1961 ; RV64-NEXT: addiw a3, a3, 819
1962 ; RV64-NEXT: addiw a4, a4, -241
1963 ; RV64-NEXT: addiw a5, a5, 257
1964 ; RV64-NEXT: slli a6, a2, 32
1965 ; RV64-NEXT: add a2, a2, a6
1966 ; RV64-NEXT: slli a6, a3, 32
1967 ; RV64-NEXT: add a3, a3, a6
1968 ; RV64-NEXT: slli a6, a4, 32
1969 ; RV64-NEXT: add a4, a4, a6
1970 ; RV64-NEXT: slli a6, a5, 32
1971 ; RV64-NEXT: add a5, a5, a6
1972 ; RV64-NEXT: addi a6, a0, -16
1973 ; RV64-NEXT: sltu a0, a0, a6
1974 ; RV64-NEXT: addi a0, a0, -1
1975 ; RV64-NEXT: and a0, a0, a6
1976 ; RV64-NEXT: li a6, 56
1977 ; RV64-NEXT: vand.vx v24, v24, a2
1978 ; RV64-NEXT: vsub.vv v8, v8, v24
1979 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1980 ; RV64-NEXT: vsrl.vi v24, v16, 1
1981 ; RV64-NEXT: vand.vx v24, v24, a2
1982 ; RV64-NEXT: vsub.vv v16, v16, v24
1983 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1984 ; RV64-NEXT: vand.vx v24, v8, a3
1985 ; RV64-NEXT: vsrl.vi v8, v8, 2
1986 ; RV64-NEXT: vand.vx v8, v8, a3
1987 ; RV64-NEXT: vadd.vv v8, v24, v8
1988 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1989 ; RV64-NEXT: vand.vx v24, v16, a3
1990 ; RV64-NEXT: vsrl.vi v16, v16, 2
1991 ; RV64-NEXT: vand.vx v16, v16, a3
1992 ; RV64-NEXT: vadd.vv v16, v24, v16
1993 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1994 ; RV64-NEXT: vsrl.vi v24, v8, 4
1995 ; RV64-NEXT: vadd.vv v8, v8, v24
1996 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1997 ; RV64-NEXT: vsrl.vi v24, v16, 4
1998 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1999 ; RV64-NEXT: vand.vx v8, v8, a4
2000 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2001 ; RV64-NEXT: vadd.vv v16, v16, v24
2002 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2003 ; RV64-NEXT: vmul.vx v8, v8, a5
2004 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2005 ; RV64-NEXT: vand.vx v16, v16, a4
2006 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2007 ; RV64-NEXT: vsrl.vx v8, v8, a6
2008 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2009 ; RV64-NEXT: vmul.vx v16, v16, a5
2010 ; RV64-NEXT: vsrl.vx v16, v16, a6
2012 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl)