1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
9 define <2 x i8> @vp_ctpop_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vp_ctpop_v2i8:
12 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
14 ; CHECK-NEXT: li a0, 85
15 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
16 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
17 ; CHECK-NEXT: li a0, 51
18 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
19 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
20 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
21 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
22 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
23 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
24 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
26 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl)
30 define <2 x i8> @vp_ctpop_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
31 ; CHECK-LABEL: vp_ctpop_v2i8_unmasked:
33 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
34 ; CHECK-NEXT: vsrl.vi v9, v8, 1
35 ; CHECK-NEXT: li a0, 85
36 ; CHECK-NEXT: vand.vx v9, v9, a0
37 ; CHECK-NEXT: vsub.vv v8, v8, v9
38 ; CHECK-NEXT: li a0, 51
39 ; CHECK-NEXT: vand.vx v9, v8, a0
40 ; CHECK-NEXT: vsrl.vi v8, v8, 2
41 ; CHECK-NEXT: vand.vx v8, v8, a0
42 ; CHECK-NEXT: vadd.vv v8, v9, v8
43 ; CHECK-NEXT: vsrl.vi v9, v8, 4
44 ; CHECK-NEXT: vadd.vv v8, v8, v9
45 ; CHECK-NEXT: vand.vi v8, v8, 15
47 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl)
51 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
53 define <4 x i8> @vp_ctpop_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
54 ; CHECK-LABEL: vp_ctpop_v4i8:
56 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
57 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
58 ; CHECK-NEXT: li a0, 85
59 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
60 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
61 ; CHECK-NEXT: li a0, 51
62 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
63 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
64 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
65 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
66 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
67 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
68 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
70 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
74 define <4 x i8> @vp_ctpop_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
75 ; CHECK-LABEL: vp_ctpop_v4i8_unmasked:
77 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
78 ; CHECK-NEXT: vsrl.vi v9, v8, 1
79 ; CHECK-NEXT: li a0, 85
80 ; CHECK-NEXT: vand.vx v9, v9, a0
81 ; CHECK-NEXT: vsub.vv v8, v8, v9
82 ; CHECK-NEXT: li a0, 51
83 ; CHECK-NEXT: vand.vx v9, v8, a0
84 ; CHECK-NEXT: vsrl.vi v8, v8, 2
85 ; CHECK-NEXT: vand.vx v8, v8, a0
86 ; CHECK-NEXT: vadd.vv v8, v9, v8
87 ; CHECK-NEXT: vsrl.vi v9, v8, 4
88 ; CHECK-NEXT: vadd.vv v8, v8, v9
89 ; CHECK-NEXT: vand.vi v8, v8, 15
91 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
95 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
97 define <8 x i8> @vp_ctpop_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
98 ; CHECK-LABEL: vp_ctpop_v8i8:
100 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
101 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
102 ; CHECK-NEXT: li a0, 85
103 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
104 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
105 ; CHECK-NEXT: li a0, 51
106 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
107 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
108 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
109 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
110 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
111 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
112 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
114 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl)
118 define <8 x i8> @vp_ctpop_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
119 ; CHECK-LABEL: vp_ctpop_v8i8_unmasked:
121 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
122 ; CHECK-NEXT: vsrl.vi v9, v8, 1
123 ; CHECK-NEXT: li a0, 85
124 ; CHECK-NEXT: vand.vx v9, v9, a0
125 ; CHECK-NEXT: vsub.vv v8, v8, v9
126 ; CHECK-NEXT: li a0, 51
127 ; CHECK-NEXT: vand.vx v9, v8, a0
128 ; CHECK-NEXT: vsrl.vi v8, v8, 2
129 ; CHECK-NEXT: vand.vx v8, v8, a0
130 ; CHECK-NEXT: vadd.vv v8, v9, v8
131 ; CHECK-NEXT: vsrl.vi v9, v8, 4
132 ; CHECK-NEXT: vadd.vv v8, v8, v9
133 ; CHECK-NEXT: vand.vi v8, v8, 15
135 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl)
139 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
141 define <16 x i8> @vp_ctpop_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
142 ; CHECK-LABEL: vp_ctpop_v16i8:
144 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
145 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
146 ; CHECK-NEXT: li a0, 85
147 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
148 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
149 ; CHECK-NEXT: li a0, 51
150 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
151 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
152 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
153 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
154 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
155 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
156 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
158 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl)
162 define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
163 ; CHECK-LABEL: vp_ctpop_v16i8_unmasked:
165 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
166 ; CHECK-NEXT: vsrl.vi v9, v8, 1
167 ; CHECK-NEXT: li a0, 85
168 ; CHECK-NEXT: vand.vx v9, v9, a0
169 ; CHECK-NEXT: vsub.vv v8, v8, v9
170 ; CHECK-NEXT: li a0, 51
171 ; CHECK-NEXT: vand.vx v9, v8, a0
172 ; CHECK-NEXT: vsrl.vi v8, v8, 2
173 ; CHECK-NEXT: vand.vx v8, v8, a0
174 ; CHECK-NEXT: vadd.vv v8, v9, v8
175 ; CHECK-NEXT: vsrl.vi v9, v8, 4
176 ; CHECK-NEXT: vadd.vv v8, v8, v9
177 ; CHECK-NEXT: vand.vi v8, v8, 15
179 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl)
183 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
185 define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
186 ; CHECK-LABEL: vp_ctpop_v2i16:
188 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
189 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
190 ; CHECK-NEXT: lui a0, 5
191 ; CHECK-NEXT: addi a0, a0, 1365
192 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
193 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
194 ; CHECK-NEXT: lui a0, 3
195 ; CHECK-NEXT: addi a0, a0, 819
196 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
197 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
198 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
199 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
200 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
201 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
202 ; CHECK-NEXT: lui a0, 1
203 ; CHECK-NEXT: addi a0, a0, -241
204 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
205 ; CHECK-NEXT: li a0, 257
206 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
207 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
209 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
213 define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
214 ; CHECK-LABEL: vp_ctpop_v2i16_unmasked:
216 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
217 ; CHECK-NEXT: vsrl.vi v9, v8, 1
218 ; CHECK-NEXT: lui a0, 5
219 ; CHECK-NEXT: addi a0, a0, 1365
220 ; CHECK-NEXT: vand.vx v9, v9, a0
221 ; CHECK-NEXT: vsub.vv v8, v8, v9
222 ; CHECK-NEXT: lui a0, 3
223 ; CHECK-NEXT: addi a0, a0, 819
224 ; CHECK-NEXT: vand.vx v9, v8, a0
225 ; CHECK-NEXT: vsrl.vi v8, v8, 2
226 ; CHECK-NEXT: vand.vx v8, v8, a0
227 ; CHECK-NEXT: vadd.vv v8, v9, v8
228 ; CHECK-NEXT: vsrl.vi v9, v8, 4
229 ; CHECK-NEXT: vadd.vv v8, v8, v9
230 ; CHECK-NEXT: lui a0, 1
231 ; CHECK-NEXT: addi a0, a0, -241
232 ; CHECK-NEXT: vand.vx v8, v8, a0
233 ; CHECK-NEXT: li a0, 257
234 ; CHECK-NEXT: vmul.vx v8, v8, a0
235 ; CHECK-NEXT: vsrl.vi v8, v8, 8
237 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl)
241 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
243 define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
244 ; CHECK-LABEL: vp_ctpop_v4i16:
246 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
247 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
248 ; CHECK-NEXT: lui a0, 5
249 ; CHECK-NEXT: addi a0, a0, 1365
250 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
251 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
252 ; CHECK-NEXT: lui a0, 3
253 ; CHECK-NEXT: addi a0, a0, 819
254 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
255 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
256 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
257 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
258 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
259 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
260 ; CHECK-NEXT: lui a0, 1
261 ; CHECK-NEXT: addi a0, a0, -241
262 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
263 ; CHECK-NEXT: li a0, 257
264 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
265 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
267 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
271 define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
272 ; CHECK-LABEL: vp_ctpop_v4i16_unmasked:
274 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
275 ; CHECK-NEXT: vsrl.vi v9, v8, 1
276 ; CHECK-NEXT: lui a0, 5
277 ; CHECK-NEXT: addi a0, a0, 1365
278 ; CHECK-NEXT: vand.vx v9, v9, a0
279 ; CHECK-NEXT: vsub.vv v8, v8, v9
280 ; CHECK-NEXT: lui a0, 3
281 ; CHECK-NEXT: addi a0, a0, 819
282 ; CHECK-NEXT: vand.vx v9, v8, a0
283 ; CHECK-NEXT: vsrl.vi v8, v8, 2
284 ; CHECK-NEXT: vand.vx v8, v8, a0
285 ; CHECK-NEXT: vadd.vv v8, v9, v8
286 ; CHECK-NEXT: vsrl.vi v9, v8, 4
287 ; CHECK-NEXT: vadd.vv v8, v8, v9
288 ; CHECK-NEXT: lui a0, 1
289 ; CHECK-NEXT: addi a0, a0, -241
290 ; CHECK-NEXT: vand.vx v8, v8, a0
291 ; CHECK-NEXT: li a0, 257
292 ; CHECK-NEXT: vmul.vx v8, v8, a0
293 ; CHECK-NEXT: vsrl.vi v8, v8, 8
295 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
299 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
301 define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
302 ; CHECK-LABEL: vp_ctpop_v8i16:
304 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
305 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
306 ; CHECK-NEXT: lui a0, 5
307 ; CHECK-NEXT: addi a0, a0, 1365
308 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
309 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
310 ; CHECK-NEXT: lui a0, 3
311 ; CHECK-NEXT: addi a0, a0, 819
312 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
313 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
314 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
315 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
316 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
317 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
318 ; CHECK-NEXT: lui a0, 1
319 ; CHECK-NEXT: addi a0, a0, -241
320 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
321 ; CHECK-NEXT: li a0, 257
322 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
323 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
325 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
329 define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
330 ; CHECK-LABEL: vp_ctpop_v8i16_unmasked:
332 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
333 ; CHECK-NEXT: vsrl.vi v9, v8, 1
334 ; CHECK-NEXT: lui a0, 5
335 ; CHECK-NEXT: addi a0, a0, 1365
336 ; CHECK-NEXT: vand.vx v9, v9, a0
337 ; CHECK-NEXT: vsub.vv v8, v8, v9
338 ; CHECK-NEXT: lui a0, 3
339 ; CHECK-NEXT: addi a0, a0, 819
340 ; CHECK-NEXT: vand.vx v9, v8, a0
341 ; CHECK-NEXT: vsrl.vi v8, v8, 2
342 ; CHECK-NEXT: vand.vx v8, v8, a0
343 ; CHECK-NEXT: vadd.vv v8, v9, v8
344 ; CHECK-NEXT: vsrl.vi v9, v8, 4
345 ; CHECK-NEXT: vadd.vv v8, v8, v9
346 ; CHECK-NEXT: lui a0, 1
347 ; CHECK-NEXT: addi a0, a0, -241
348 ; CHECK-NEXT: vand.vx v8, v8, a0
349 ; CHECK-NEXT: li a0, 257
350 ; CHECK-NEXT: vmul.vx v8, v8, a0
351 ; CHECK-NEXT: vsrl.vi v8, v8, 8
353 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl)
357 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
359 define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
360 ; CHECK-LABEL: vp_ctpop_v16i16:
362 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
363 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
364 ; CHECK-NEXT: lui a0, 5
365 ; CHECK-NEXT: addi a0, a0, 1365
366 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
367 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
368 ; CHECK-NEXT: lui a0, 3
369 ; CHECK-NEXT: addi a0, a0, 819
370 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
371 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
372 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
373 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
374 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
375 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
376 ; CHECK-NEXT: lui a0, 1
377 ; CHECK-NEXT: addi a0, a0, -241
378 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
379 ; CHECK-NEXT: li a0, 257
380 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
381 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
383 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
387 define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
388 ; CHECK-LABEL: vp_ctpop_v16i16_unmasked:
390 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
391 ; CHECK-NEXT: vsrl.vi v10, v8, 1
392 ; CHECK-NEXT: lui a0, 5
393 ; CHECK-NEXT: addi a0, a0, 1365
394 ; CHECK-NEXT: vand.vx v10, v10, a0
395 ; CHECK-NEXT: vsub.vv v8, v8, v10
396 ; CHECK-NEXT: lui a0, 3
397 ; CHECK-NEXT: addi a0, a0, 819
398 ; CHECK-NEXT: vand.vx v10, v8, a0
399 ; CHECK-NEXT: vsrl.vi v8, v8, 2
400 ; CHECK-NEXT: vand.vx v8, v8, a0
401 ; CHECK-NEXT: vadd.vv v8, v10, v8
402 ; CHECK-NEXT: vsrl.vi v10, v8, 4
403 ; CHECK-NEXT: vadd.vv v8, v8, v10
404 ; CHECK-NEXT: lui a0, 1
405 ; CHECK-NEXT: addi a0, a0, -241
406 ; CHECK-NEXT: vand.vx v8, v8, a0
407 ; CHECK-NEXT: li a0, 257
408 ; CHECK-NEXT: vmul.vx v8, v8, a0
409 ; CHECK-NEXT: vsrl.vi v8, v8, 8
411 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl)
415 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
417 define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
418 ; CHECK-LABEL: vp_ctpop_v2i32:
420 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
421 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
422 ; CHECK-NEXT: lui a0, 349525
423 ; CHECK-NEXT: addi a0, a0, 1365
424 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
425 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
426 ; CHECK-NEXT: lui a0, 209715
427 ; CHECK-NEXT: addi a0, a0, 819
428 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
429 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
430 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
431 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
432 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
433 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
434 ; CHECK-NEXT: lui a0, 61681
435 ; CHECK-NEXT: addi a0, a0, -241
436 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
437 ; CHECK-NEXT: lui a0, 4112
438 ; CHECK-NEXT: addi a0, a0, 257
439 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
440 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
442 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
446 define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
447 ; CHECK-LABEL: vp_ctpop_v2i32_unmasked:
449 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
450 ; CHECK-NEXT: vsrl.vi v9, v8, 1
451 ; CHECK-NEXT: lui a0, 349525
452 ; CHECK-NEXT: addi a0, a0, 1365
453 ; CHECK-NEXT: vand.vx v9, v9, a0
454 ; CHECK-NEXT: vsub.vv v8, v8, v9
455 ; CHECK-NEXT: lui a0, 209715
456 ; CHECK-NEXT: addi a0, a0, 819
457 ; CHECK-NEXT: vand.vx v9, v8, a0
458 ; CHECK-NEXT: vsrl.vi v8, v8, 2
459 ; CHECK-NEXT: vand.vx v8, v8, a0
460 ; CHECK-NEXT: vadd.vv v8, v9, v8
461 ; CHECK-NEXT: vsrl.vi v9, v8, 4
462 ; CHECK-NEXT: vadd.vv v8, v8, v9
463 ; CHECK-NEXT: lui a0, 61681
464 ; CHECK-NEXT: addi a0, a0, -241
465 ; CHECK-NEXT: vand.vx v8, v8, a0
466 ; CHECK-NEXT: lui a0, 4112
467 ; CHECK-NEXT: addi a0, a0, 257
468 ; CHECK-NEXT: vmul.vx v8, v8, a0
469 ; CHECK-NEXT: vsrl.vi v8, v8, 24
471 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl)
475 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
477 define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
478 ; CHECK-LABEL: vp_ctpop_v4i32:
480 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
481 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
482 ; CHECK-NEXT: lui a0, 349525
483 ; CHECK-NEXT: addi a0, a0, 1365
484 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
485 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
486 ; CHECK-NEXT: lui a0, 209715
487 ; CHECK-NEXT: addi a0, a0, 819
488 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
489 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
490 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
491 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
492 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
493 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
494 ; CHECK-NEXT: lui a0, 61681
495 ; CHECK-NEXT: addi a0, a0, -241
496 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
497 ; CHECK-NEXT: lui a0, 4112
498 ; CHECK-NEXT: addi a0, a0, 257
499 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
500 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
502 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
506 define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
507 ; CHECK-LABEL: vp_ctpop_v4i32_unmasked:
509 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
510 ; CHECK-NEXT: vsrl.vi v9, v8, 1
511 ; CHECK-NEXT: lui a0, 349525
512 ; CHECK-NEXT: addi a0, a0, 1365
513 ; CHECK-NEXT: vand.vx v9, v9, a0
514 ; CHECK-NEXT: vsub.vv v8, v8, v9
515 ; CHECK-NEXT: lui a0, 209715
516 ; CHECK-NEXT: addi a0, a0, 819
517 ; CHECK-NEXT: vand.vx v9, v8, a0
518 ; CHECK-NEXT: vsrl.vi v8, v8, 2
519 ; CHECK-NEXT: vand.vx v8, v8, a0
520 ; CHECK-NEXT: vadd.vv v8, v9, v8
521 ; CHECK-NEXT: vsrl.vi v9, v8, 4
522 ; CHECK-NEXT: vadd.vv v8, v8, v9
523 ; CHECK-NEXT: lui a0, 61681
524 ; CHECK-NEXT: addi a0, a0, -241
525 ; CHECK-NEXT: vand.vx v8, v8, a0
526 ; CHECK-NEXT: lui a0, 4112
527 ; CHECK-NEXT: addi a0, a0, 257
528 ; CHECK-NEXT: vmul.vx v8, v8, a0
529 ; CHECK-NEXT: vsrl.vi v8, v8, 24
531 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
535 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
537 define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
538 ; CHECK-LABEL: vp_ctpop_v8i32:
540 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
541 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
542 ; CHECK-NEXT: lui a0, 349525
543 ; CHECK-NEXT: addi a0, a0, 1365
544 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
545 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
546 ; CHECK-NEXT: lui a0, 209715
547 ; CHECK-NEXT: addi a0, a0, 819
548 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
549 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
550 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
551 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
552 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
553 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
554 ; CHECK-NEXT: lui a0, 61681
555 ; CHECK-NEXT: addi a0, a0, -241
556 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
557 ; CHECK-NEXT: lui a0, 4112
558 ; CHECK-NEXT: addi a0, a0, 257
559 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
560 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
562 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
566 define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
567 ; CHECK-LABEL: vp_ctpop_v8i32_unmasked:
569 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
570 ; CHECK-NEXT: vsrl.vi v10, v8, 1
571 ; CHECK-NEXT: lui a0, 349525
572 ; CHECK-NEXT: addi a0, a0, 1365
573 ; CHECK-NEXT: vand.vx v10, v10, a0
574 ; CHECK-NEXT: vsub.vv v8, v8, v10
575 ; CHECK-NEXT: lui a0, 209715
576 ; CHECK-NEXT: addi a0, a0, 819
577 ; CHECK-NEXT: vand.vx v10, v8, a0
578 ; CHECK-NEXT: vsrl.vi v8, v8, 2
579 ; CHECK-NEXT: vand.vx v8, v8, a0
580 ; CHECK-NEXT: vadd.vv v8, v10, v8
581 ; CHECK-NEXT: vsrl.vi v10, v8, 4
582 ; CHECK-NEXT: vadd.vv v8, v8, v10
583 ; CHECK-NEXT: lui a0, 61681
584 ; CHECK-NEXT: addi a0, a0, -241
585 ; CHECK-NEXT: vand.vx v8, v8, a0
586 ; CHECK-NEXT: lui a0, 4112
587 ; CHECK-NEXT: addi a0, a0, 257
588 ; CHECK-NEXT: vmul.vx v8, v8, a0
589 ; CHECK-NEXT: vsrl.vi v8, v8, 24
591 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl)
595 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
597 define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
598 ; CHECK-LABEL: vp_ctpop_v16i32:
600 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
601 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
602 ; CHECK-NEXT: lui a0, 349525
603 ; CHECK-NEXT: addi a0, a0, 1365
604 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
605 ; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
606 ; CHECK-NEXT: lui a0, 209715
607 ; CHECK-NEXT: addi a0, a0, 819
608 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
609 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
610 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
611 ; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
612 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
613 ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
614 ; CHECK-NEXT: lui a0, 61681
615 ; CHECK-NEXT: addi a0, a0, -241
616 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
617 ; CHECK-NEXT: lui a0, 4112
618 ; CHECK-NEXT: addi a0, a0, 257
619 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
620 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
622 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
626 define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
627 ; CHECK-LABEL: vp_ctpop_v16i32_unmasked:
629 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
630 ; CHECK-NEXT: vsrl.vi v12, v8, 1
631 ; CHECK-NEXT: lui a0, 349525
632 ; CHECK-NEXT: addi a0, a0, 1365
633 ; CHECK-NEXT: vand.vx v12, v12, a0
634 ; CHECK-NEXT: vsub.vv v8, v8, v12
635 ; CHECK-NEXT: lui a0, 209715
636 ; CHECK-NEXT: addi a0, a0, 819
637 ; CHECK-NEXT: vand.vx v12, v8, a0
638 ; CHECK-NEXT: vsrl.vi v8, v8, 2
639 ; CHECK-NEXT: vand.vx v8, v8, a0
640 ; CHECK-NEXT: vadd.vv v8, v12, v8
641 ; CHECK-NEXT: vsrl.vi v12, v8, 4
642 ; CHECK-NEXT: vadd.vv v8, v8, v12
643 ; CHECK-NEXT: lui a0, 61681
644 ; CHECK-NEXT: addi a0, a0, -241
645 ; CHECK-NEXT: vand.vx v8, v8, a0
646 ; CHECK-NEXT: lui a0, 4112
647 ; CHECK-NEXT: addi a0, a0, 257
648 ; CHECK-NEXT: vmul.vx v8, v8, a0
649 ; CHECK-NEXT: vsrl.vi v8, v8, 24
651 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl)
655 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
657 define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
658 ; RV32-LABEL: vp_ctpop_v2i64:
660 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
661 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
662 ; RV32-NEXT: lui a1, 349525
663 ; RV32-NEXT: addi a1, a1, 1365
664 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
665 ; RV32-NEXT: vmv.v.x v10, a1
666 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
667 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
668 ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
669 ; RV32-NEXT: lui a1, 209715
670 ; RV32-NEXT: addi a1, a1, 819
671 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
672 ; RV32-NEXT: vmv.v.x v9, a1
673 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
674 ; RV32-NEXT: vand.vv v10, v8, v9, v0.t
675 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
676 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t
677 ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
678 ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
679 ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
680 ; RV32-NEXT: lui a1, 61681
681 ; RV32-NEXT: addi a1, a1, -241
682 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
683 ; RV32-NEXT: vmv.v.x v9, a1
684 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
685 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t
686 ; RV32-NEXT: lui a1, 4112
687 ; RV32-NEXT: addi a1, a1, 257
688 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
689 ; RV32-NEXT: vmv.v.x v9, a1
690 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
691 ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
692 ; RV32-NEXT: li a0, 56
693 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
696 ; RV64-LABEL: vp_ctpop_v2i64:
698 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
699 ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
700 ; RV64-NEXT: lui a0, 349525
701 ; RV64-NEXT: addiw a0, a0, 1365
702 ; RV64-NEXT: slli a1, a0, 32
703 ; RV64-NEXT: add a0, a0, a1
704 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t
705 ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
706 ; RV64-NEXT: lui a0, 209715
707 ; RV64-NEXT: addiw a0, a0, 819
708 ; RV64-NEXT: slli a1, a0, 32
709 ; RV64-NEXT: add a0, a0, a1
710 ; RV64-NEXT: vand.vx v9, v8, a0, v0.t
711 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
712 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
713 ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
714 ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
715 ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
716 ; RV64-NEXT: lui a0, 61681
717 ; RV64-NEXT: addiw a0, a0, -241
718 ; RV64-NEXT: slli a1, a0, 32
719 ; RV64-NEXT: add a0, a0, a1
720 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
721 ; RV64-NEXT: lui a0, 4112
722 ; RV64-NEXT: addiw a0, a0, 257
723 ; RV64-NEXT: slli a1, a0, 32
724 ; RV64-NEXT: add a0, a0, a1
725 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
726 ; RV64-NEXT: li a0, 56
727 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
729 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
733 define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
734 ; RV32-LABEL: vp_ctpop_v2i64_unmasked:
736 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
737 ; RV32-NEXT: vsrl.vi v9, v8, 1
738 ; RV32-NEXT: lui a1, 349525
739 ; RV32-NEXT: addi a1, a1, 1365
740 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
741 ; RV32-NEXT: vmv.v.x v10, a1
742 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
743 ; RV32-NEXT: vand.vv v9, v9, v10
744 ; RV32-NEXT: vsub.vv v8, v8, v9
745 ; RV32-NEXT: lui a1, 209715
746 ; RV32-NEXT: addi a1, a1, 819
747 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
748 ; RV32-NEXT: vmv.v.x v9, a1
749 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
750 ; RV32-NEXT: vand.vv v10, v8, v9
751 ; RV32-NEXT: vsrl.vi v8, v8, 2
752 ; RV32-NEXT: vand.vv v8, v8, v9
753 ; RV32-NEXT: vadd.vv v8, v10, v8
754 ; RV32-NEXT: vsrl.vi v9, v8, 4
755 ; RV32-NEXT: vadd.vv v8, v8, v9
756 ; RV32-NEXT: lui a1, 61681
757 ; RV32-NEXT: addi a1, a1, -241
758 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
759 ; RV32-NEXT: vmv.v.x v9, a1
760 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
761 ; RV32-NEXT: vand.vv v8, v8, v9
762 ; RV32-NEXT: lui a1, 4112
763 ; RV32-NEXT: addi a1, a1, 257
764 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
765 ; RV32-NEXT: vmv.v.x v9, a1
766 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
767 ; RV32-NEXT: vmul.vv v8, v8, v9
768 ; RV32-NEXT: li a0, 56
769 ; RV32-NEXT: vsrl.vx v8, v8, a0
772 ; RV64-LABEL: vp_ctpop_v2i64_unmasked:
774 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
775 ; RV64-NEXT: vsrl.vi v9, v8, 1
776 ; RV64-NEXT: lui a0, 349525
777 ; RV64-NEXT: addiw a0, a0, 1365
778 ; RV64-NEXT: slli a1, a0, 32
779 ; RV64-NEXT: add a0, a0, a1
780 ; RV64-NEXT: vand.vx v9, v9, a0
781 ; RV64-NEXT: vsub.vv v8, v8, v9
782 ; RV64-NEXT: lui a0, 209715
783 ; RV64-NEXT: addiw a0, a0, 819
784 ; RV64-NEXT: slli a1, a0, 32
785 ; RV64-NEXT: add a0, a0, a1
786 ; RV64-NEXT: vand.vx v9, v8, a0
787 ; RV64-NEXT: vsrl.vi v8, v8, 2
788 ; RV64-NEXT: vand.vx v8, v8, a0
789 ; RV64-NEXT: vadd.vv v8, v9, v8
790 ; RV64-NEXT: vsrl.vi v9, v8, 4
791 ; RV64-NEXT: vadd.vv v8, v8, v9
792 ; RV64-NEXT: lui a0, 61681
793 ; RV64-NEXT: addiw a0, a0, -241
794 ; RV64-NEXT: slli a1, a0, 32
795 ; RV64-NEXT: add a0, a0, a1
796 ; RV64-NEXT: vand.vx v8, v8, a0
797 ; RV64-NEXT: lui a0, 4112
798 ; RV64-NEXT: addiw a0, a0, 257
799 ; RV64-NEXT: slli a1, a0, 32
800 ; RV64-NEXT: add a0, a0, a1
801 ; RV64-NEXT: vmul.vx v8, v8, a0
802 ; RV64-NEXT: li a0, 56
803 ; RV64-NEXT: vsrl.vx v8, v8, a0
805 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl)
809 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
811 define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
812 ; RV32-LABEL: vp_ctpop_v4i64:
814 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
815 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
816 ; RV32-NEXT: lui a1, 349525
817 ; RV32-NEXT: addi a1, a1, 1365
818 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
819 ; RV32-NEXT: vmv.v.x v12, a1
820 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
821 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
822 ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
823 ; RV32-NEXT: lui a1, 209715
824 ; RV32-NEXT: addi a1, a1, 819
825 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
826 ; RV32-NEXT: vmv.v.x v10, a1
827 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
828 ; RV32-NEXT: vand.vv v12, v8, v10, v0.t
829 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
830 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
831 ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
832 ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
833 ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
834 ; RV32-NEXT: lui a1, 61681
835 ; RV32-NEXT: addi a1, a1, -241
836 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
837 ; RV32-NEXT: vmv.v.x v10, a1
838 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
839 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
840 ; RV32-NEXT: lui a1, 4112
841 ; RV32-NEXT: addi a1, a1, 257
842 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
843 ; RV32-NEXT: vmv.v.x v10, a1
844 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
845 ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
846 ; RV32-NEXT: li a0, 56
847 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
850 ; RV64-LABEL: vp_ctpop_v4i64:
852 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
853 ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
854 ; RV64-NEXT: lui a0, 349525
855 ; RV64-NEXT: addiw a0, a0, 1365
856 ; RV64-NEXT: slli a1, a0, 32
857 ; RV64-NEXT: add a0, a0, a1
858 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t
859 ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
860 ; RV64-NEXT: lui a0, 209715
861 ; RV64-NEXT: addiw a0, a0, 819
862 ; RV64-NEXT: slli a1, a0, 32
863 ; RV64-NEXT: add a0, a0, a1
864 ; RV64-NEXT: vand.vx v10, v8, a0, v0.t
865 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
866 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
867 ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
868 ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
869 ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
870 ; RV64-NEXT: lui a0, 61681
871 ; RV64-NEXT: addiw a0, a0, -241
872 ; RV64-NEXT: slli a1, a0, 32
873 ; RV64-NEXT: add a0, a0, a1
874 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
875 ; RV64-NEXT: lui a0, 4112
876 ; RV64-NEXT: addiw a0, a0, 257
877 ; RV64-NEXT: slli a1, a0, 32
878 ; RV64-NEXT: add a0, a0, a1
879 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
880 ; RV64-NEXT: li a0, 56
881 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
883 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
887 define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
888 ; RV32-LABEL: vp_ctpop_v4i64_unmasked:
890 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
891 ; RV32-NEXT: vsrl.vi v10, v8, 1
892 ; RV32-NEXT: lui a1, 349525
893 ; RV32-NEXT: addi a1, a1, 1365
894 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
895 ; RV32-NEXT: vmv.v.x v12, a1
896 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
897 ; RV32-NEXT: vand.vv v10, v10, v12
898 ; RV32-NEXT: vsub.vv v8, v8, v10
899 ; RV32-NEXT: lui a1, 209715
900 ; RV32-NEXT: addi a1, a1, 819
901 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
902 ; RV32-NEXT: vmv.v.x v10, a1
903 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
904 ; RV32-NEXT: vand.vv v12, v8, v10
905 ; RV32-NEXT: vsrl.vi v8, v8, 2
906 ; RV32-NEXT: vand.vv v8, v8, v10
907 ; RV32-NEXT: vadd.vv v8, v12, v8
908 ; RV32-NEXT: vsrl.vi v10, v8, 4
909 ; RV32-NEXT: vadd.vv v8, v8, v10
910 ; RV32-NEXT: lui a1, 61681
911 ; RV32-NEXT: addi a1, a1, -241
912 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
913 ; RV32-NEXT: vmv.v.x v10, a1
914 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
915 ; RV32-NEXT: vand.vv v8, v8, v10
916 ; RV32-NEXT: lui a1, 4112
917 ; RV32-NEXT: addi a1, a1, 257
918 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
919 ; RV32-NEXT: vmv.v.x v10, a1
920 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
921 ; RV32-NEXT: vmul.vv v8, v8, v10
922 ; RV32-NEXT: li a0, 56
923 ; RV32-NEXT: vsrl.vx v8, v8, a0
926 ; RV64-LABEL: vp_ctpop_v4i64_unmasked:
928 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
929 ; RV64-NEXT: vsrl.vi v10, v8, 1
930 ; RV64-NEXT: lui a0, 349525
931 ; RV64-NEXT: addiw a0, a0, 1365
932 ; RV64-NEXT: slli a1, a0, 32
933 ; RV64-NEXT: add a0, a0, a1
934 ; RV64-NEXT: vand.vx v10, v10, a0
935 ; RV64-NEXT: vsub.vv v8, v8, v10
936 ; RV64-NEXT: lui a0, 209715
937 ; RV64-NEXT: addiw a0, a0, 819
938 ; RV64-NEXT: slli a1, a0, 32
939 ; RV64-NEXT: add a0, a0, a1
940 ; RV64-NEXT: vand.vx v10, v8, a0
941 ; RV64-NEXT: vsrl.vi v8, v8, 2
942 ; RV64-NEXT: vand.vx v8, v8, a0
943 ; RV64-NEXT: vadd.vv v8, v10, v8
944 ; RV64-NEXT: vsrl.vi v10, v8, 4
945 ; RV64-NEXT: vadd.vv v8, v8, v10
946 ; RV64-NEXT: lui a0, 61681
947 ; RV64-NEXT: addiw a0, a0, -241
948 ; RV64-NEXT: slli a1, a0, 32
949 ; RV64-NEXT: add a0, a0, a1
950 ; RV64-NEXT: vand.vx v8, v8, a0
951 ; RV64-NEXT: lui a0, 4112
952 ; RV64-NEXT: addiw a0, a0, 257
953 ; RV64-NEXT: slli a1, a0, 32
954 ; RV64-NEXT: add a0, a0, a1
955 ; RV64-NEXT: vmul.vx v8, v8, a0
956 ; RV64-NEXT: li a0, 56
957 ; RV64-NEXT: vsrl.vx v8, v8, a0
959 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
963 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
965 define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
966 ; RV32-LABEL: vp_ctpop_v8i64:
968 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
969 ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
970 ; RV32-NEXT: lui a1, 349525
971 ; RV32-NEXT: addi a1, a1, 1365
972 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
973 ; RV32-NEXT: vmv.v.x v16, a1
974 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
975 ; RV32-NEXT: vand.vv v12, v12, v16, v0.t
976 ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
977 ; RV32-NEXT: lui a1, 209715
978 ; RV32-NEXT: addi a1, a1, 819
979 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
980 ; RV32-NEXT: vmv.v.x v12, a1
981 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
982 ; RV32-NEXT: vand.vv v16, v8, v12, v0.t
983 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
984 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
985 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
986 ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
987 ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
988 ; RV32-NEXT: lui a1, 61681
989 ; RV32-NEXT: addi a1, a1, -241
990 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
991 ; RV32-NEXT: vmv.v.x v12, a1
992 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
993 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
994 ; RV32-NEXT: lui a1, 4112
995 ; RV32-NEXT: addi a1, a1, 257
996 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
997 ; RV32-NEXT: vmv.v.x v12, a1
998 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
999 ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
1000 ; RV32-NEXT: li a0, 56
1001 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1004 ; RV64-LABEL: vp_ctpop_v8i64:
1006 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1007 ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
1008 ; RV64-NEXT: lui a0, 349525
1009 ; RV64-NEXT: addiw a0, a0, 1365
1010 ; RV64-NEXT: slli a1, a0, 32
1011 ; RV64-NEXT: add a0, a0, a1
1012 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t
1013 ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
1014 ; RV64-NEXT: lui a0, 209715
1015 ; RV64-NEXT: addiw a0, a0, 819
1016 ; RV64-NEXT: slli a1, a0, 32
1017 ; RV64-NEXT: add a0, a0, a1
1018 ; RV64-NEXT: vand.vx v12, v8, a0, v0.t
1019 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1020 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1021 ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
1022 ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
1023 ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
1024 ; RV64-NEXT: lui a0, 61681
1025 ; RV64-NEXT: addiw a0, a0, -241
1026 ; RV64-NEXT: slli a1, a0, 32
1027 ; RV64-NEXT: add a0, a0, a1
1028 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1029 ; RV64-NEXT: lui a0, 4112
1030 ; RV64-NEXT: addiw a0, a0, 257
1031 ; RV64-NEXT: slli a1, a0, 32
1032 ; RV64-NEXT: add a0, a0, a1
1033 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
1034 ; RV64-NEXT: li a0, 56
1035 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1037 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
1041 define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1042 ; RV32-LABEL: vp_ctpop_v8i64_unmasked:
1044 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1045 ; RV32-NEXT: vsrl.vi v12, v8, 1
1046 ; RV32-NEXT: lui a1, 349525
1047 ; RV32-NEXT: addi a1, a1, 1365
1048 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1049 ; RV32-NEXT: vmv.v.x v16, a1
1050 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1051 ; RV32-NEXT: vand.vv v12, v12, v16
1052 ; RV32-NEXT: vsub.vv v8, v8, v12
1053 ; RV32-NEXT: lui a1, 209715
1054 ; RV32-NEXT: addi a1, a1, 819
1055 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1056 ; RV32-NEXT: vmv.v.x v12, a1
1057 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1058 ; RV32-NEXT: vand.vv v16, v8, v12
1059 ; RV32-NEXT: vsrl.vi v8, v8, 2
1060 ; RV32-NEXT: vand.vv v8, v8, v12
1061 ; RV32-NEXT: vadd.vv v8, v16, v8
1062 ; RV32-NEXT: vsrl.vi v12, v8, 4
1063 ; RV32-NEXT: vadd.vv v8, v8, v12
1064 ; RV32-NEXT: lui a1, 61681
1065 ; RV32-NEXT: addi a1, a1, -241
1066 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1067 ; RV32-NEXT: vmv.v.x v12, a1
1068 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1069 ; RV32-NEXT: vand.vv v8, v8, v12
1070 ; RV32-NEXT: lui a1, 4112
1071 ; RV32-NEXT: addi a1, a1, 257
1072 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1073 ; RV32-NEXT: vmv.v.x v12, a1
1074 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1075 ; RV32-NEXT: vmul.vv v8, v8, v12
1076 ; RV32-NEXT: li a0, 56
1077 ; RV32-NEXT: vsrl.vx v8, v8, a0
1080 ; RV64-LABEL: vp_ctpop_v8i64_unmasked:
1082 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1083 ; RV64-NEXT: vsrl.vi v12, v8, 1
1084 ; RV64-NEXT: lui a0, 349525
1085 ; RV64-NEXT: addiw a0, a0, 1365
1086 ; RV64-NEXT: slli a1, a0, 32
1087 ; RV64-NEXT: add a0, a0, a1
1088 ; RV64-NEXT: vand.vx v12, v12, a0
1089 ; RV64-NEXT: vsub.vv v8, v8, v12
1090 ; RV64-NEXT: lui a0, 209715
1091 ; RV64-NEXT: addiw a0, a0, 819
1092 ; RV64-NEXT: slli a1, a0, 32
1093 ; RV64-NEXT: add a0, a0, a1
1094 ; RV64-NEXT: vand.vx v12, v8, a0
1095 ; RV64-NEXT: vsrl.vi v8, v8, 2
1096 ; RV64-NEXT: vand.vx v8, v8, a0
1097 ; RV64-NEXT: vadd.vv v8, v12, v8
1098 ; RV64-NEXT: vsrl.vi v12, v8, 4
1099 ; RV64-NEXT: vadd.vv v8, v8, v12
1100 ; RV64-NEXT: lui a0, 61681
1101 ; RV64-NEXT: addiw a0, a0, -241
1102 ; RV64-NEXT: slli a1, a0, 32
1103 ; RV64-NEXT: add a0, a0, a1
1104 ; RV64-NEXT: vand.vx v8, v8, a0
1105 ; RV64-NEXT: lui a0, 4112
1106 ; RV64-NEXT: addiw a0, a0, 257
1107 ; RV64-NEXT: slli a1, a0, 32
1108 ; RV64-NEXT: add a0, a0, a1
1109 ; RV64-NEXT: vmul.vx v8, v8, a0
1110 ; RV64-NEXT: li a0, 56
1111 ; RV64-NEXT: vsrl.vx v8, v8, a0
1113 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl)
1117 declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32)
1119 define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1120 ; RV32-LABEL: vp_ctpop_v15i64:
1122 ; RV32-NEXT: addi sp, sp, -48
1123 ; RV32-NEXT: .cfi_def_cfa_offset 48
1124 ; RV32-NEXT: csrr a1, vlenb
1125 ; RV32-NEXT: li a2, 24
1126 ; RV32-NEXT: mul a1, a1, a2
1127 ; RV32-NEXT: sub sp, sp, a1
1128 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1129 ; RV32-NEXT: lui a1, 349525
1130 ; RV32-NEXT: addi a1, a1, 1365
1131 ; RV32-NEXT: sw a1, 44(sp)
1132 ; RV32-NEXT: sw a1, 40(sp)
1133 ; RV32-NEXT: lui a1, 209715
1134 ; RV32-NEXT: addi a1, a1, 819
1135 ; RV32-NEXT: sw a1, 36(sp)
1136 ; RV32-NEXT: sw a1, 32(sp)
1137 ; RV32-NEXT: lui a1, 61681
1138 ; RV32-NEXT: addi a1, a1, -241
1139 ; RV32-NEXT: sw a1, 28(sp)
1140 ; RV32-NEXT: sw a1, 24(sp)
1141 ; RV32-NEXT: lui a1, 4112
1142 ; RV32-NEXT: addi a1, a1, 257
1143 ; RV32-NEXT: sw a1, 20(sp)
1144 ; RV32-NEXT: sw a1, 16(sp)
1145 ; RV32-NEXT: addi a1, sp, 40
1146 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1147 ; RV32-NEXT: vlse64.v v16, (a1), zero
1148 ; RV32-NEXT: csrr a1, vlenb
1149 ; RV32-NEXT: slli a1, a1, 4
1150 ; RV32-NEXT: add a1, sp, a1
1151 ; RV32-NEXT: addi a1, a1, 48
1152 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1153 ; RV32-NEXT: addi a1, sp, 32
1154 ; RV32-NEXT: vlse64.v v16, (a1), zero
1155 ; RV32-NEXT: addi a1, sp, 48
1156 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1157 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1158 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1159 ; RV32-NEXT: csrr a1, vlenb
1160 ; RV32-NEXT: slli a1, a1, 3
1161 ; RV32-NEXT: add a1, sp, a1
1162 ; RV32-NEXT: addi a1, a1, 48
1163 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1164 ; RV32-NEXT: csrr a1, vlenb
1165 ; RV32-NEXT: slli a1, a1, 4
1166 ; RV32-NEXT: add a1, sp, a1
1167 ; RV32-NEXT: addi a1, a1, 48
1168 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1169 ; RV32-NEXT: csrr a1, vlenb
1170 ; RV32-NEXT: slli a1, a1, 3
1171 ; RV32-NEXT: add a1, sp, a1
1172 ; RV32-NEXT: addi a1, a1, 48
1173 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1174 ; RV32-NEXT: vand.vv v24, v16, v24, v0.t
1175 ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
1176 ; RV32-NEXT: addi a1, sp, 48
1177 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1178 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1179 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1180 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1181 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1182 ; RV32-NEXT: addi a1, sp, 24
1183 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1184 ; RV32-NEXT: vlse64.v v16, (a1), zero
1185 ; RV32-NEXT: csrr a1, vlenb
1186 ; RV32-NEXT: slli a1, a1, 4
1187 ; RV32-NEXT: add a1, sp, a1
1188 ; RV32-NEXT: addi a1, a1, 48
1189 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1190 ; RV32-NEXT: addi a1, sp, 16
1191 ; RV32-NEXT: vlse64.v v24, (a1), zero
1192 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1193 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1194 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1195 ; RV32-NEXT: csrr a0, vlenb
1196 ; RV32-NEXT: slli a0, a0, 4
1197 ; RV32-NEXT: add a0, sp, a0
1198 ; RV32-NEXT: addi a0, a0, 48
1199 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1200 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1201 ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t
1202 ; RV32-NEXT: li a0, 56
1203 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1204 ; RV32-NEXT: csrr a0, vlenb
1205 ; RV32-NEXT: li a1, 24
1206 ; RV32-NEXT: mul a0, a0, a1
1207 ; RV32-NEXT: add sp, sp, a0
1208 ; RV32-NEXT: addi sp, sp, 48
1211 ; RV64-LABEL: vp_ctpop_v15i64:
1213 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1214 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1215 ; RV64-NEXT: lui a0, 349525
1216 ; RV64-NEXT: addiw a0, a0, 1365
1217 ; RV64-NEXT: slli a1, a0, 32
1218 ; RV64-NEXT: add a0, a0, a1
1219 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
1220 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1221 ; RV64-NEXT: lui a0, 209715
1222 ; RV64-NEXT: addiw a0, a0, 819
1223 ; RV64-NEXT: slli a1, a0, 32
1224 ; RV64-NEXT: add a0, a0, a1
1225 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
1226 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1227 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1228 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1229 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1230 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1231 ; RV64-NEXT: lui a0, 61681
1232 ; RV64-NEXT: addiw a0, a0, -241
1233 ; RV64-NEXT: slli a1, a0, 32
1234 ; RV64-NEXT: add a0, a0, a1
1235 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1236 ; RV64-NEXT: lui a0, 4112
1237 ; RV64-NEXT: addiw a0, a0, 257
1238 ; RV64-NEXT: slli a1, a0, 32
1239 ; RV64-NEXT: add a0, a0, a1
1240 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
1241 ; RV64-NEXT: li a0, 56
1242 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1244 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
1248 define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1249 ; RV32-LABEL: vp_ctpop_v15i64_unmasked:
1251 ; RV32-NEXT: addi sp, sp, -32
1252 ; RV32-NEXT: .cfi_def_cfa_offset 32
1253 ; RV32-NEXT: lui a1, 349525
1254 ; RV32-NEXT: addi a1, a1, 1365
1255 ; RV32-NEXT: sw a1, 28(sp)
1256 ; RV32-NEXT: sw a1, 24(sp)
1257 ; RV32-NEXT: lui a1, 209715
1258 ; RV32-NEXT: addi a1, a1, 819
1259 ; RV32-NEXT: sw a1, 20(sp)
1260 ; RV32-NEXT: sw a1, 16(sp)
1261 ; RV32-NEXT: lui a1, 61681
1262 ; RV32-NEXT: addi a1, a1, -241
1263 ; RV32-NEXT: sw a1, 12(sp)
1264 ; RV32-NEXT: sw a1, 8(sp)
1265 ; RV32-NEXT: lui a1, 4112
1266 ; RV32-NEXT: addi a1, a1, 257
1267 ; RV32-NEXT: sw a1, 4(sp)
1268 ; RV32-NEXT: sw a1, 0(sp)
1269 ; RV32-NEXT: addi a1, sp, 24
1270 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1271 ; RV32-NEXT: vlse64.v v16, (a1), zero
1272 ; RV32-NEXT: addi a1, sp, 16
1273 ; RV32-NEXT: vlse64.v v24, (a1), zero
1274 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1275 ; RV32-NEXT: vsrl.vi v0, v8, 1
1276 ; RV32-NEXT: vand.vv v16, v0, v16
1277 ; RV32-NEXT: vsub.vv v8, v8, v16
1278 ; RV32-NEXT: vand.vv v16, v8, v24
1279 ; RV32-NEXT: vsrl.vi v8, v8, 2
1280 ; RV32-NEXT: vand.vv v8, v8, v24
1281 ; RV32-NEXT: vadd.vv v8, v16, v8
1282 ; RV32-NEXT: addi a1, sp, 8
1283 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1284 ; RV32-NEXT: vlse64.v v16, (a1), zero
1285 ; RV32-NEXT: mv a1, sp
1286 ; RV32-NEXT: vlse64.v v24, (a1), zero
1287 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1288 ; RV32-NEXT: vsrl.vi v0, v8, 4
1289 ; RV32-NEXT: vadd.vv v8, v8, v0
1290 ; RV32-NEXT: vand.vv v8, v8, v16
1291 ; RV32-NEXT: vmul.vv v8, v8, v24
1292 ; RV32-NEXT: li a0, 56
1293 ; RV32-NEXT: vsrl.vx v8, v8, a0
1294 ; RV32-NEXT: addi sp, sp, 32
1297 ; RV64-LABEL: vp_ctpop_v15i64_unmasked:
1299 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1300 ; RV64-NEXT: vsrl.vi v16, v8, 1
1301 ; RV64-NEXT: lui a0, 349525
1302 ; RV64-NEXT: addiw a0, a0, 1365
1303 ; RV64-NEXT: slli a1, a0, 32
1304 ; RV64-NEXT: add a0, a0, a1
1305 ; RV64-NEXT: vand.vx v16, v16, a0
1306 ; RV64-NEXT: vsub.vv v8, v8, v16
1307 ; RV64-NEXT: lui a0, 209715
1308 ; RV64-NEXT: addiw a0, a0, 819
1309 ; RV64-NEXT: slli a1, a0, 32
1310 ; RV64-NEXT: add a0, a0, a1
1311 ; RV64-NEXT: vand.vx v16, v8, a0
1312 ; RV64-NEXT: vsrl.vi v8, v8, 2
1313 ; RV64-NEXT: vand.vx v8, v8, a0
1314 ; RV64-NEXT: vadd.vv v8, v16, v8
1315 ; RV64-NEXT: vsrl.vi v16, v8, 4
1316 ; RV64-NEXT: vadd.vv v8, v8, v16
1317 ; RV64-NEXT: lui a0, 61681
1318 ; RV64-NEXT: addiw a0, a0, -241
1319 ; RV64-NEXT: slli a1, a0, 32
1320 ; RV64-NEXT: add a0, a0, a1
1321 ; RV64-NEXT: vand.vx v8, v8, a0
1322 ; RV64-NEXT: lui a0, 4112
1323 ; RV64-NEXT: addiw a0, a0, 257
1324 ; RV64-NEXT: slli a1, a0, 32
1325 ; RV64-NEXT: add a0, a0, a1
1326 ; RV64-NEXT: vmul.vx v8, v8, a0
1327 ; RV64-NEXT: li a0, 56
1328 ; RV64-NEXT: vsrl.vx v8, v8, a0
1330 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl)
1334 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
1336 define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
1337 ; RV32-LABEL: vp_ctpop_v16i64:
1339 ; RV32-NEXT: addi sp, sp, -48
1340 ; RV32-NEXT: .cfi_def_cfa_offset 48
1341 ; RV32-NEXT: csrr a1, vlenb
1342 ; RV32-NEXT: li a2, 24
1343 ; RV32-NEXT: mul a1, a1, a2
1344 ; RV32-NEXT: sub sp, sp, a1
1345 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1346 ; RV32-NEXT: lui a1, 349525
1347 ; RV32-NEXT: addi a1, a1, 1365
1348 ; RV32-NEXT: sw a1, 44(sp)
1349 ; RV32-NEXT: sw a1, 40(sp)
1350 ; RV32-NEXT: lui a1, 209715
1351 ; RV32-NEXT: addi a1, a1, 819
1352 ; RV32-NEXT: sw a1, 36(sp)
1353 ; RV32-NEXT: sw a1, 32(sp)
1354 ; RV32-NEXT: lui a1, 61681
1355 ; RV32-NEXT: addi a1, a1, -241
1356 ; RV32-NEXT: sw a1, 28(sp)
1357 ; RV32-NEXT: sw a1, 24(sp)
1358 ; RV32-NEXT: lui a1, 4112
1359 ; RV32-NEXT: addi a1, a1, 257
1360 ; RV32-NEXT: sw a1, 20(sp)
1361 ; RV32-NEXT: sw a1, 16(sp)
1362 ; RV32-NEXT: addi a1, sp, 40
1363 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1364 ; RV32-NEXT: vlse64.v v16, (a1), zero
1365 ; RV32-NEXT: csrr a1, vlenb
1366 ; RV32-NEXT: slli a1, a1, 4
1367 ; RV32-NEXT: add a1, sp, a1
1368 ; RV32-NEXT: addi a1, a1, 48
1369 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1370 ; RV32-NEXT: addi a1, sp, 32
1371 ; RV32-NEXT: vlse64.v v16, (a1), zero
1372 ; RV32-NEXT: addi a1, sp, 48
1373 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1374 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1375 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1376 ; RV32-NEXT: csrr a1, vlenb
1377 ; RV32-NEXT: slli a1, a1, 3
1378 ; RV32-NEXT: add a1, sp, a1
1379 ; RV32-NEXT: addi a1, a1, 48
1380 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1381 ; RV32-NEXT: csrr a1, vlenb
1382 ; RV32-NEXT: slli a1, a1, 4
1383 ; RV32-NEXT: add a1, sp, a1
1384 ; RV32-NEXT: addi a1, a1, 48
1385 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1386 ; RV32-NEXT: csrr a1, vlenb
1387 ; RV32-NEXT: slli a1, a1, 3
1388 ; RV32-NEXT: add a1, sp, a1
1389 ; RV32-NEXT: addi a1, a1, 48
1390 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1391 ; RV32-NEXT: vand.vv v24, v16, v24, v0.t
1392 ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
1393 ; RV32-NEXT: addi a1, sp, 48
1394 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1395 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1396 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1397 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1398 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1399 ; RV32-NEXT: addi a1, sp, 24
1400 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1401 ; RV32-NEXT: vlse64.v v16, (a1), zero
1402 ; RV32-NEXT: csrr a1, vlenb
1403 ; RV32-NEXT: slli a1, a1, 4
1404 ; RV32-NEXT: add a1, sp, a1
1405 ; RV32-NEXT: addi a1, a1, 48
1406 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1407 ; RV32-NEXT: addi a1, sp, 16
1408 ; RV32-NEXT: vlse64.v v24, (a1), zero
1409 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1410 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1411 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1412 ; RV32-NEXT: csrr a0, vlenb
1413 ; RV32-NEXT: slli a0, a0, 4
1414 ; RV32-NEXT: add a0, sp, a0
1415 ; RV32-NEXT: addi a0, a0, 48
1416 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1417 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1418 ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t
1419 ; RV32-NEXT: li a0, 56
1420 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1421 ; RV32-NEXT: csrr a0, vlenb
1422 ; RV32-NEXT: li a1, 24
1423 ; RV32-NEXT: mul a0, a0, a1
1424 ; RV32-NEXT: add sp, sp, a0
1425 ; RV32-NEXT: addi sp, sp, 48
1428 ; RV64-LABEL: vp_ctpop_v16i64:
1430 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1431 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1432 ; RV64-NEXT: lui a0, 349525
1433 ; RV64-NEXT: addiw a0, a0, 1365
1434 ; RV64-NEXT: slli a1, a0, 32
1435 ; RV64-NEXT: add a0, a0, a1
1436 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
1437 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1438 ; RV64-NEXT: lui a0, 209715
1439 ; RV64-NEXT: addiw a0, a0, 819
1440 ; RV64-NEXT: slli a1, a0, 32
1441 ; RV64-NEXT: add a0, a0, a1
1442 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
1443 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1444 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1445 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1446 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1447 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1448 ; RV64-NEXT: lui a0, 61681
1449 ; RV64-NEXT: addiw a0, a0, -241
1450 ; RV64-NEXT: slli a1, a0, 32
1451 ; RV64-NEXT: add a0, a0, a1
1452 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1453 ; RV64-NEXT: lui a0, 4112
1454 ; RV64-NEXT: addiw a0, a0, 257
1455 ; RV64-NEXT: slli a1, a0, 32
1456 ; RV64-NEXT: add a0, a0, a1
1457 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
1458 ; RV64-NEXT: li a0, 56
1459 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1461 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
1465 define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1466 ; RV32-LABEL: vp_ctpop_v16i64_unmasked:
1468 ; RV32-NEXT: addi sp, sp, -32
1469 ; RV32-NEXT: .cfi_def_cfa_offset 32
1470 ; RV32-NEXT: lui a1, 349525
1471 ; RV32-NEXT: addi a1, a1, 1365
1472 ; RV32-NEXT: sw a1, 28(sp)
1473 ; RV32-NEXT: sw a1, 24(sp)
1474 ; RV32-NEXT: lui a1, 209715
1475 ; RV32-NEXT: addi a1, a1, 819
1476 ; RV32-NEXT: sw a1, 20(sp)
1477 ; RV32-NEXT: sw a1, 16(sp)
1478 ; RV32-NEXT: lui a1, 61681
1479 ; RV32-NEXT: addi a1, a1, -241
1480 ; RV32-NEXT: sw a1, 12(sp)
1481 ; RV32-NEXT: sw a1, 8(sp)
1482 ; RV32-NEXT: lui a1, 4112
1483 ; RV32-NEXT: addi a1, a1, 257
1484 ; RV32-NEXT: sw a1, 4(sp)
1485 ; RV32-NEXT: sw a1, 0(sp)
1486 ; RV32-NEXT: addi a1, sp, 24
1487 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1488 ; RV32-NEXT: vlse64.v v16, (a1), zero
1489 ; RV32-NEXT: addi a1, sp, 16
1490 ; RV32-NEXT: vlse64.v v24, (a1), zero
1491 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1492 ; RV32-NEXT: vsrl.vi v0, v8, 1
1493 ; RV32-NEXT: vand.vv v16, v0, v16
1494 ; RV32-NEXT: vsub.vv v8, v8, v16
1495 ; RV32-NEXT: vand.vv v16, v8, v24
1496 ; RV32-NEXT: vsrl.vi v8, v8, 2
1497 ; RV32-NEXT: vand.vv v8, v8, v24
1498 ; RV32-NEXT: vadd.vv v8, v16, v8
1499 ; RV32-NEXT: addi a1, sp, 8
1500 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1501 ; RV32-NEXT: vlse64.v v16, (a1), zero
1502 ; RV32-NEXT: mv a1, sp
1503 ; RV32-NEXT: vlse64.v v24, (a1), zero
1504 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1505 ; RV32-NEXT: vsrl.vi v0, v8, 4
1506 ; RV32-NEXT: vadd.vv v8, v8, v0
1507 ; RV32-NEXT: vand.vv v8, v8, v16
1508 ; RV32-NEXT: vmul.vv v8, v8, v24
1509 ; RV32-NEXT: li a0, 56
1510 ; RV32-NEXT: vsrl.vx v8, v8, a0
1511 ; RV32-NEXT: addi sp, sp, 32
1514 ; RV64-LABEL: vp_ctpop_v16i64_unmasked:
1516 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1517 ; RV64-NEXT: vsrl.vi v16, v8, 1
1518 ; RV64-NEXT: lui a0, 349525
1519 ; RV64-NEXT: addiw a0, a0, 1365
1520 ; RV64-NEXT: slli a1, a0, 32
1521 ; RV64-NEXT: add a0, a0, a1
1522 ; RV64-NEXT: vand.vx v16, v16, a0
1523 ; RV64-NEXT: vsub.vv v8, v8, v16
1524 ; RV64-NEXT: lui a0, 209715
1525 ; RV64-NEXT: addiw a0, a0, 819
1526 ; RV64-NEXT: slli a1, a0, 32
1527 ; RV64-NEXT: add a0, a0, a1
1528 ; RV64-NEXT: vand.vx v16, v8, a0
1529 ; RV64-NEXT: vsrl.vi v8, v8, 2
1530 ; RV64-NEXT: vand.vx v8, v8, a0
1531 ; RV64-NEXT: vadd.vv v8, v16, v8
1532 ; RV64-NEXT: vsrl.vi v16, v8, 4
1533 ; RV64-NEXT: vadd.vv v8, v8, v16
1534 ; RV64-NEXT: lui a0, 61681
1535 ; RV64-NEXT: addiw a0, a0, -241
1536 ; RV64-NEXT: slli a1, a0, 32
1537 ; RV64-NEXT: add a0, a0, a1
1538 ; RV64-NEXT: vand.vx v8, v8, a0
1539 ; RV64-NEXT: lui a0, 4112
1540 ; RV64-NEXT: addiw a0, a0, 257
1541 ; RV64-NEXT: slli a1, a0, 32
1542 ; RV64-NEXT: add a0, a0, a1
1543 ; RV64-NEXT: vmul.vx v8, v8, a0
1544 ; RV64-NEXT: li a0, 56
1545 ; RV64-NEXT: vsrl.vx v8, v8, a0
1547 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl)
1551 declare <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64>, <32 x i1>, i32)
1553 define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
1554 ; RV32-LABEL: vp_ctpop_v32i64:
1556 ; RV32-NEXT: addi sp, sp, -48
1557 ; RV32-NEXT: .cfi_def_cfa_offset 48
1558 ; RV32-NEXT: csrr a1, vlenb
1559 ; RV32-NEXT: li a2, 48
1560 ; RV32-NEXT: mul a1, a1, a2
1561 ; RV32-NEXT: sub sp, sp, a1
1562 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
1563 ; RV32-NEXT: csrr a1, vlenb
1564 ; RV32-NEXT: slli a1, a1, 4
1565 ; RV32-NEXT: add a1, sp, a1
1566 ; RV32-NEXT: addi a1, a1, 48
1567 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1568 ; RV32-NEXT: csrr a1, vlenb
1569 ; RV32-NEXT: li a2, 40
1570 ; RV32-NEXT: mul a1, a1, a2
1571 ; RV32-NEXT: add a1, sp, a1
1572 ; RV32-NEXT: addi a1, a1, 48
1573 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1574 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1575 ; RV32-NEXT: vslidedown.vi v7, v0, 2
1576 ; RV32-NEXT: lui a1, 349525
1577 ; RV32-NEXT: addi a1, a1, 1365
1578 ; RV32-NEXT: sw a1, 44(sp)
1579 ; RV32-NEXT: sw a1, 40(sp)
1580 ; RV32-NEXT: lui a1, 209715
1581 ; RV32-NEXT: addi a1, a1, 819
1582 ; RV32-NEXT: sw a1, 36(sp)
1583 ; RV32-NEXT: sw a1, 32(sp)
1584 ; RV32-NEXT: lui a1, 61681
1585 ; RV32-NEXT: addi a1, a1, -241
1586 ; RV32-NEXT: sw a1, 28(sp)
1587 ; RV32-NEXT: sw a1, 24(sp)
1588 ; RV32-NEXT: lui a1, 4112
1589 ; RV32-NEXT: addi a1, a1, 257
1590 ; RV32-NEXT: sw a1, 20(sp)
1591 ; RV32-NEXT: li a2, 16
1592 ; RV32-NEXT: sw a1, 16(sp)
1593 ; RV32-NEXT: mv a1, a0
1594 ; RV32-NEXT: bltu a0, a2, .LBB34_2
1595 ; RV32-NEXT: # %bb.1:
1596 ; RV32-NEXT: li a1, 16
1597 ; RV32-NEXT: .LBB34_2:
1598 ; RV32-NEXT: addi a2, sp, 40
1599 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1600 ; RV32-NEXT: vlse64.v v8, (a2), zero
1601 ; RV32-NEXT: csrr a2, vlenb
1602 ; RV32-NEXT: slli a2, a2, 5
1603 ; RV32-NEXT: add a2, sp, a2
1604 ; RV32-NEXT: addi a2, a2, 48
1605 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1606 ; RV32-NEXT: addi a2, sp, 32
1607 ; RV32-NEXT: vlse64.v v16, (a2), zero
1608 ; RV32-NEXT: csrr a2, vlenb
1609 ; RV32-NEXT: li a3, 24
1610 ; RV32-NEXT: mul a2, a2, a3
1611 ; RV32-NEXT: add a2, sp, a2
1612 ; RV32-NEXT: addi a2, a2, 48
1613 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1614 ; RV32-NEXT: csrr a2, vlenb
1615 ; RV32-NEXT: li a3, 40
1616 ; RV32-NEXT: mul a2, a2, a3
1617 ; RV32-NEXT: add a2, sp, a2
1618 ; RV32-NEXT: addi a2, a2, 48
1619 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1620 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1621 ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
1622 ; RV32-NEXT: csrr a2, vlenb
1623 ; RV32-NEXT: slli a2, a2, 5
1624 ; RV32-NEXT: add a2, sp, a2
1625 ; RV32-NEXT: addi a2, a2, 48
1626 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1627 ; RV32-NEXT: vand.vv v24, v24, v8, v0.t
1628 ; RV32-NEXT: csrr a2, vlenb
1629 ; RV32-NEXT: li a3, 40
1630 ; RV32-NEXT: mul a2, a2, a3
1631 ; RV32-NEXT: add a2, sp, a2
1632 ; RV32-NEXT: addi a2, a2, 48
1633 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1634 ; RV32-NEXT: vsub.vv v24, v8, v24, v0.t
1635 ; RV32-NEXT: vand.vv v8, v24, v16, v0.t
1636 ; RV32-NEXT: csrr a2, vlenb
1637 ; RV32-NEXT: li a3, 40
1638 ; RV32-NEXT: mul a2, a2, a3
1639 ; RV32-NEXT: add a2, sp, a2
1640 ; RV32-NEXT: addi a2, a2, 48
1641 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1642 ; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
1643 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1644 ; RV32-NEXT: csrr a2, vlenb
1645 ; RV32-NEXT: li a3, 40
1646 ; RV32-NEXT: mul a2, a2, a3
1647 ; RV32-NEXT: add a2, sp, a2
1648 ; RV32-NEXT: addi a2, a2, 48
1649 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1650 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
1651 ; RV32-NEXT: csrr a2, vlenb
1652 ; RV32-NEXT: slli a2, a2, 3
1653 ; RV32-NEXT: add a2, sp, a2
1654 ; RV32-NEXT: addi a2, a2, 48
1655 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1656 ; RV32-NEXT: addi a2, sp, 24
1657 ; RV32-NEXT: addi a3, sp, 16
1658 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1659 ; RV32-NEXT: vlse64.v v16, (a2), zero
1660 ; RV32-NEXT: addi a2, sp, 48
1661 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1662 ; RV32-NEXT: vlse64.v v8, (a3), zero
1663 ; RV32-NEXT: csrr a2, vlenb
1664 ; RV32-NEXT: li a3, 40
1665 ; RV32-NEXT: mul a2, a2, a3
1666 ; RV32-NEXT: add a2, sp, a2
1667 ; RV32-NEXT: addi a2, a2, 48
1668 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1669 ; RV32-NEXT: csrr a2, vlenb
1670 ; RV32-NEXT: slli a2, a2, 3
1671 ; RV32-NEXT: add a2, sp, a2
1672 ; RV32-NEXT: addi a2, a2, 48
1673 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1674 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1675 ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t
1676 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1677 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1678 ; RV32-NEXT: csrr a1, vlenb
1679 ; RV32-NEXT: li a2, 40
1680 ; RV32-NEXT: mul a1, a1, a2
1681 ; RV32-NEXT: add a1, sp, a1
1682 ; RV32-NEXT: addi a1, a1, 48
1683 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1684 ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t
1685 ; RV32-NEXT: li a1, 56
1686 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1687 ; RV32-NEXT: csrr a2, vlenb
1688 ; RV32-NEXT: slli a2, a2, 3
1689 ; RV32-NEXT: add a2, sp, a2
1690 ; RV32-NEXT: addi a2, a2, 48
1691 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1692 ; RV32-NEXT: addi a2, a0, -16
1693 ; RV32-NEXT: sltu a0, a0, a2
1694 ; RV32-NEXT: addi a0, a0, -1
1695 ; RV32-NEXT: and a0, a0, a2
1696 ; RV32-NEXT: vmv1r.v v0, v7
1697 ; RV32-NEXT: csrr a2, vlenb
1698 ; RV32-NEXT: slli a2, a2, 4
1699 ; RV32-NEXT: add a2, sp, a2
1700 ; RV32-NEXT: addi a2, a2, 48
1701 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1702 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1703 ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
1704 ; RV32-NEXT: csrr a0, vlenb
1705 ; RV32-NEXT: slli a0, a0, 5
1706 ; RV32-NEXT: add a0, sp, a0
1707 ; RV32-NEXT: addi a0, a0, 48
1708 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1709 ; RV32-NEXT: vand.vv v8, v24, v8, v0.t
1710 ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
1711 ; RV32-NEXT: csrr a0, vlenb
1712 ; RV32-NEXT: li a2, 24
1713 ; RV32-NEXT: mul a0, a0, a2
1714 ; RV32-NEXT: add a0, sp, a0
1715 ; RV32-NEXT: addi a0, a0, 48
1716 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1717 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1718 ; RV32-NEXT: csrr a0, vlenb
1719 ; RV32-NEXT: slli a0, a0, 5
1720 ; RV32-NEXT: add a0, sp, a0
1721 ; RV32-NEXT: addi a0, a0, 48
1722 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1723 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1724 ; RV32-NEXT: csrr a0, vlenb
1725 ; RV32-NEXT: li a2, 24
1726 ; RV32-NEXT: mul a0, a0, a2
1727 ; RV32-NEXT: add a0, sp, a0
1728 ; RV32-NEXT: addi a0, a0, 48
1729 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1730 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1731 ; RV32-NEXT: csrr a0, vlenb
1732 ; RV32-NEXT: slli a0, a0, 5
1733 ; RV32-NEXT: add a0, sp, a0
1734 ; RV32-NEXT: addi a0, a0, 48
1735 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1736 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
1737 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1738 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1739 ; RV32-NEXT: addi a0, sp, 48
1740 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1741 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1742 ; RV32-NEXT: csrr a0, vlenb
1743 ; RV32-NEXT: li a2, 40
1744 ; RV32-NEXT: mul a0, a0, a2
1745 ; RV32-NEXT: add a0, sp, a0
1746 ; RV32-NEXT: addi a0, a0, 48
1747 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1748 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1749 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
1750 ; RV32-NEXT: csrr a0, vlenb
1751 ; RV32-NEXT: slli a0, a0, 3
1752 ; RV32-NEXT: add a0, sp, a0
1753 ; RV32-NEXT: addi a0, a0, 48
1754 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1755 ; RV32-NEXT: csrr a0, vlenb
1756 ; RV32-NEXT: li a1, 48
1757 ; RV32-NEXT: mul a0, a0, a1
1758 ; RV32-NEXT: add sp, sp, a0
1759 ; RV32-NEXT: addi sp, sp, 48
1762 ; RV64-LABEL: vp_ctpop_v32i64:
1764 ; RV64-NEXT: addi sp, sp, -16
1765 ; RV64-NEXT: .cfi_def_cfa_offset 16
1766 ; RV64-NEXT: csrr a1, vlenb
1767 ; RV64-NEXT: slli a1, a1, 4
1768 ; RV64-NEXT: sub sp, sp, a1
1769 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1770 ; RV64-NEXT: csrr a1, vlenb
1771 ; RV64-NEXT: slli a1, a1, 3
1772 ; RV64-NEXT: add a1, sp, a1
1773 ; RV64-NEXT: addi a1, a1, 16
1774 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1775 ; RV64-NEXT: li a2, 16
1776 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1777 ; RV64-NEXT: vslidedown.vi v24, v0, 2
1778 ; RV64-NEXT: mv a1, a0
1779 ; RV64-NEXT: bltu a0, a2, .LBB34_2
1780 ; RV64-NEXT: # %bb.1:
1781 ; RV64-NEXT: li a1, 16
1782 ; RV64-NEXT: .LBB34_2:
1783 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1784 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1785 ; RV64-NEXT: lui a1, 349525
1786 ; RV64-NEXT: addiw a1, a1, 1365
1787 ; RV64-NEXT: slli a2, a1, 32
1788 ; RV64-NEXT: add a1, a1, a2
1789 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1790 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1791 ; RV64-NEXT: lui a2, 209715
1792 ; RV64-NEXT: addiw a2, a2, 819
1793 ; RV64-NEXT: slli a3, a2, 32
1794 ; RV64-NEXT: add a2, a2, a3
1795 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
1796 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1797 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1798 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1799 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1800 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1801 ; RV64-NEXT: lui a3, 61681
1802 ; RV64-NEXT: addiw a3, a3, -241
1803 ; RV64-NEXT: slli a4, a3, 32
1804 ; RV64-NEXT: add a3, a3, a4
1805 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1806 ; RV64-NEXT: lui a4, 4112
1807 ; RV64-NEXT: addiw a4, a4, 257
1808 ; RV64-NEXT: slli a5, a4, 32
1809 ; RV64-NEXT: add a4, a4, a5
1810 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1811 ; RV64-NEXT: li a5, 56
1812 ; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t
1813 ; RV64-NEXT: addi a6, sp, 16
1814 ; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
1815 ; RV64-NEXT: addi a6, a0, -16
1816 ; RV64-NEXT: sltu a0, a0, a6
1817 ; RV64-NEXT: addi a0, a0, -1
1818 ; RV64-NEXT: and a0, a0, a6
1819 ; RV64-NEXT: vmv1r.v v0, v24
1820 ; RV64-NEXT: csrr a6, vlenb
1821 ; RV64-NEXT: slli a6, a6, 3
1822 ; RV64-NEXT: add a6, sp, a6
1823 ; RV64-NEXT: addi a6, a6, 16
1824 ; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload
1825 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1826 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1827 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1828 ; RV64-NEXT: vsub.vv v16, v8, v16, v0.t
1829 ; RV64-NEXT: vand.vx v8, v16, a2, v0.t
1830 ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
1831 ; RV64-NEXT: vand.vx v16, v16, a2, v0.t
1832 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1833 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1834 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1835 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1836 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1837 ; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
1838 ; RV64-NEXT: addi a0, sp, 16
1839 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1840 ; RV64-NEXT: csrr a0, vlenb
1841 ; RV64-NEXT: slli a0, a0, 4
1842 ; RV64-NEXT: add sp, sp, a0
1843 ; RV64-NEXT: addi sp, sp, 16
1845 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
1849 define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
1850 ; RV32-LABEL: vp_ctpop_v32i64_unmasked:
1852 ; RV32-NEXT: addi sp, sp, -48
1853 ; RV32-NEXT: .cfi_def_cfa_offset 48
1854 ; RV32-NEXT: csrr a1, vlenb
1855 ; RV32-NEXT: slli a1, a1, 4
1856 ; RV32-NEXT: sub sp, sp, a1
1857 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
1858 ; RV32-NEXT: csrr a1, vlenb
1859 ; RV32-NEXT: slli a1, a1, 3
1860 ; RV32-NEXT: add a1, sp, a1
1861 ; RV32-NEXT: addi a1, a1, 48
1862 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1863 ; RV32-NEXT: lui a1, 349525
1864 ; RV32-NEXT: addi a1, a1, 1365
1865 ; RV32-NEXT: sw a1, 44(sp)
1866 ; RV32-NEXT: sw a1, 40(sp)
1867 ; RV32-NEXT: lui a1, 209715
1868 ; RV32-NEXT: addi a1, a1, 819
1869 ; RV32-NEXT: sw a1, 36(sp)
1870 ; RV32-NEXT: sw a1, 32(sp)
1871 ; RV32-NEXT: lui a1, 61681
1872 ; RV32-NEXT: addi a1, a1, -241
1873 ; RV32-NEXT: sw a1, 28(sp)
1874 ; RV32-NEXT: sw a1, 24(sp)
1875 ; RV32-NEXT: lui a1, 4112
1876 ; RV32-NEXT: addi a1, a1, 257
1877 ; RV32-NEXT: sw a1, 20(sp)
1878 ; RV32-NEXT: li a2, 16
1879 ; RV32-NEXT: sw a1, 16(sp)
1880 ; RV32-NEXT: mv a1, a0
1881 ; RV32-NEXT: bltu a0, a2, .LBB35_2
1882 ; RV32-NEXT: # %bb.1:
1883 ; RV32-NEXT: li a1, 16
1884 ; RV32-NEXT: .LBB35_2:
1885 ; RV32-NEXT: addi a2, sp, 40
1886 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1887 ; RV32-NEXT: vlse64.v v16, (a2), zero
1888 ; RV32-NEXT: addi a2, sp, 32
1889 ; RV32-NEXT: vlse64.v v24, (a2), zero
1890 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1891 ; RV32-NEXT: vsrl.vi v0, v8, 1
1892 ; RV32-NEXT: vand.vv v0, v0, v16
1893 ; RV32-NEXT: vsub.vv v8, v8, v0
1894 ; RV32-NEXT: vand.vv v0, v8, v24
1895 ; RV32-NEXT: vsrl.vi v8, v8, 2
1896 ; RV32-NEXT: vand.vv v8, v8, v24
1897 ; RV32-NEXT: vadd.vv v8, v0, v8
1898 ; RV32-NEXT: vsrl.vi v0, v8, 4
1899 ; RV32-NEXT: vadd.vv v8, v8, v0
1900 ; RV32-NEXT: addi a2, sp, 48
1901 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1902 ; RV32-NEXT: addi a2, a0, -16
1903 ; RV32-NEXT: sltu a0, a0, a2
1904 ; RV32-NEXT: addi a0, a0, -1
1905 ; RV32-NEXT: and a0, a0, a2
1906 ; RV32-NEXT: csrr a2, vlenb
1907 ; RV32-NEXT: slli a2, a2, 3
1908 ; RV32-NEXT: add a2, sp, a2
1909 ; RV32-NEXT: addi a2, a2, 48
1910 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1911 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1912 ; RV32-NEXT: vsrl.vi v0, v8, 1
1913 ; RV32-NEXT: vand.vv v16, v0, v16
1914 ; RV32-NEXT: addi a2, sp, 24
1915 ; RV32-NEXT: vsub.vv v16, v8, v16
1916 ; RV32-NEXT: vand.vv v0, v16, v24
1917 ; RV32-NEXT: vsrl.vi v16, v16, 2
1918 ; RV32-NEXT: vand.vv v16, v16, v24
1919 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1920 ; RV32-NEXT: vlse64.v v24, (a2), zero
1921 ; RV32-NEXT: addi a2, sp, 16
1922 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1923 ; RV32-NEXT: vadd.vv v16, v0, v16
1924 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1925 ; RV32-NEXT: vlse64.v v0, (a2), zero
1926 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1927 ; RV32-NEXT: vsrl.vi v8, v16, 4
1928 ; RV32-NEXT: vadd.vv v8, v16, v8
1929 ; RV32-NEXT: addi a2, sp, 48
1930 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1931 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1932 ; RV32-NEXT: vand.vv v16, v16, v24
1933 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1934 ; RV32-NEXT: vand.vv v8, v8, v24
1935 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1936 ; RV32-NEXT: vmul.vv v16, v16, v0
1937 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1938 ; RV32-NEXT: vmul.vv v24, v8, v0
1939 ; RV32-NEXT: li a2, 56
1940 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1941 ; RV32-NEXT: vsrl.vx v8, v16, a2
1942 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1943 ; RV32-NEXT: vsrl.vx v16, v24, a2
1944 ; RV32-NEXT: csrr a0, vlenb
1945 ; RV32-NEXT: slli a0, a0, 4
1946 ; RV32-NEXT: add sp, sp, a0
1947 ; RV32-NEXT: addi sp, sp, 48
1950 ; RV64-LABEL: vp_ctpop_v32i64_unmasked:
1952 ; RV64-NEXT: li a2, 16
1953 ; RV64-NEXT: mv a1, a0
1954 ; RV64-NEXT: bltu a0, a2, .LBB35_2
1955 ; RV64-NEXT: # %bb.1:
1956 ; RV64-NEXT: li a1, 16
1957 ; RV64-NEXT: .LBB35_2:
1958 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1959 ; RV64-NEXT: vsrl.vi v24, v8, 1
1960 ; RV64-NEXT: lui a1, 349525
1961 ; RV64-NEXT: addiw a1, a1, 1365
1962 ; RV64-NEXT: slli a2, a1, 32
1963 ; RV64-NEXT: add a1, a1, a2
1964 ; RV64-NEXT: vand.vx v24, v24, a1
1965 ; RV64-NEXT: vsub.vv v8, v8, v24
1966 ; RV64-NEXT: lui a2, 209715
1967 ; RV64-NEXT: addiw a2, a2, 819
1968 ; RV64-NEXT: slli a3, a2, 32
1969 ; RV64-NEXT: add a2, a2, a3
1970 ; RV64-NEXT: vand.vx v24, v8, a2
1971 ; RV64-NEXT: vsrl.vi v8, v8, 2
1972 ; RV64-NEXT: vand.vx v8, v8, a2
1973 ; RV64-NEXT: vadd.vv v8, v24, v8
1974 ; RV64-NEXT: vsrl.vi v24, v8, 4
1975 ; RV64-NEXT: vadd.vv v8, v8, v24
1976 ; RV64-NEXT: lui a3, 61681
1977 ; RV64-NEXT: addiw a3, a3, -241
1978 ; RV64-NEXT: slli a4, a3, 32
1979 ; RV64-NEXT: add a3, a3, a4
1980 ; RV64-NEXT: vand.vx v8, v8, a3
1981 ; RV64-NEXT: lui a4, 4112
1982 ; RV64-NEXT: addiw a4, a4, 257
1983 ; RV64-NEXT: slli a5, a4, 32
1984 ; RV64-NEXT: add a4, a4, a5
1985 ; RV64-NEXT: vmul.vx v8, v8, a4
1986 ; RV64-NEXT: li a5, 56
1987 ; RV64-NEXT: vsrl.vx v8, v8, a5
1988 ; RV64-NEXT: addi a6, a0, -16
1989 ; RV64-NEXT: sltu a0, a0, a6
1990 ; RV64-NEXT: addi a0, a0, -1
1991 ; RV64-NEXT: and a0, a0, a6
1992 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1993 ; RV64-NEXT: vsrl.vi v24, v16, 1
1994 ; RV64-NEXT: vand.vx v24, v24, a1
1995 ; RV64-NEXT: vsub.vv v16, v16, v24
1996 ; RV64-NEXT: vand.vx v24, v16, a2
1997 ; RV64-NEXT: vsrl.vi v16, v16, 2
1998 ; RV64-NEXT: vand.vx v16, v16, a2
1999 ; RV64-NEXT: vadd.vv v16, v24, v16
2000 ; RV64-NEXT: vsrl.vi v24, v16, 4
2001 ; RV64-NEXT: vadd.vv v16, v16, v24
2002 ; RV64-NEXT: vand.vx v16, v16, a3
2003 ; RV64-NEXT: vmul.vx v16, v16, a4
2004 ; RV64-NEXT: vsrl.vx v16, v16, a5
2006 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl)