1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
9 define <2 x i8> @vp_ctpop_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vp_ctpop_v2i8:
12 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
14 ; CHECK-NEXT: li a0, 85
15 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
16 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
17 ; CHECK-NEXT: li a0, 51
18 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
19 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
20 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
21 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
22 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
23 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
24 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
26 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl)
30 define <2 x i8> @vp_ctpop_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
31 ; CHECK-LABEL: vp_ctpop_v2i8_unmasked:
33 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
34 ; CHECK-NEXT: vsrl.vi v9, v8, 1
35 ; CHECK-NEXT: li a0, 85
36 ; CHECK-NEXT: vand.vx v9, v9, a0
37 ; CHECK-NEXT: vsub.vv v8, v8, v9
38 ; CHECK-NEXT: li a0, 51
39 ; CHECK-NEXT: vand.vx v9, v8, a0
40 ; CHECK-NEXT: vsrl.vi v8, v8, 2
41 ; CHECK-NEXT: vand.vx v8, v8, a0
42 ; CHECK-NEXT: vadd.vv v8, v9, v8
43 ; CHECK-NEXT: vsrl.vi v9, v8, 4
44 ; CHECK-NEXT: vadd.vv v8, v8, v9
45 ; CHECK-NEXT: vand.vi v8, v8, 15
47 %head = insertelement <2 x i1> poison, i1 true, i32 0
48 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
49 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl)
53 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
55 define <4 x i8> @vp_ctpop_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
56 ; CHECK-LABEL: vp_ctpop_v4i8:
58 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
59 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
60 ; CHECK-NEXT: li a0, 85
61 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
62 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
63 ; CHECK-NEXT: li a0, 51
64 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
65 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
66 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
67 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
68 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
69 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
70 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
72 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
76 define <4 x i8> @vp_ctpop_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
77 ; CHECK-LABEL: vp_ctpop_v4i8_unmasked:
79 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
80 ; CHECK-NEXT: vsrl.vi v9, v8, 1
81 ; CHECK-NEXT: li a0, 85
82 ; CHECK-NEXT: vand.vx v9, v9, a0
83 ; CHECK-NEXT: vsub.vv v8, v8, v9
84 ; CHECK-NEXT: li a0, 51
85 ; CHECK-NEXT: vand.vx v9, v8, a0
86 ; CHECK-NEXT: vsrl.vi v8, v8, 2
87 ; CHECK-NEXT: vand.vx v8, v8, a0
88 ; CHECK-NEXT: vadd.vv v8, v9, v8
89 ; CHECK-NEXT: vsrl.vi v9, v8, 4
90 ; CHECK-NEXT: vadd.vv v8, v8, v9
91 ; CHECK-NEXT: vand.vi v8, v8, 15
93 %head = insertelement <4 x i1> poison, i1 true, i32 0
94 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
95 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
99 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
101 define <8 x i8> @vp_ctpop_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
102 ; CHECK-LABEL: vp_ctpop_v8i8:
104 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
105 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
106 ; CHECK-NEXT: li a0, 85
107 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
108 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
109 ; CHECK-NEXT: li a0, 51
110 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
111 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
112 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
113 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
114 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
115 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
116 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
118 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl)
122 define <8 x i8> @vp_ctpop_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
123 ; CHECK-LABEL: vp_ctpop_v8i8_unmasked:
125 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
126 ; CHECK-NEXT: vsrl.vi v9, v8, 1
127 ; CHECK-NEXT: li a0, 85
128 ; CHECK-NEXT: vand.vx v9, v9, a0
129 ; CHECK-NEXT: vsub.vv v8, v8, v9
130 ; CHECK-NEXT: li a0, 51
131 ; CHECK-NEXT: vand.vx v9, v8, a0
132 ; CHECK-NEXT: vsrl.vi v8, v8, 2
133 ; CHECK-NEXT: vand.vx v8, v8, a0
134 ; CHECK-NEXT: vadd.vv v8, v9, v8
135 ; CHECK-NEXT: vsrl.vi v9, v8, 4
136 ; CHECK-NEXT: vadd.vv v8, v8, v9
137 ; CHECK-NEXT: vand.vi v8, v8, 15
139 %head = insertelement <8 x i1> poison, i1 true, i32 0
140 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
141 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl)
145 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
147 define <16 x i8> @vp_ctpop_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
148 ; CHECK-LABEL: vp_ctpop_v16i8:
150 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
151 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
152 ; CHECK-NEXT: li a0, 85
153 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
154 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
155 ; CHECK-NEXT: li a0, 51
156 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
157 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
158 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
159 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
160 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
161 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
162 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
164 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl)
168 define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
169 ; CHECK-LABEL: vp_ctpop_v16i8_unmasked:
171 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
172 ; CHECK-NEXT: vsrl.vi v9, v8, 1
173 ; CHECK-NEXT: li a0, 85
174 ; CHECK-NEXT: vand.vx v9, v9, a0
175 ; CHECK-NEXT: vsub.vv v8, v8, v9
176 ; CHECK-NEXT: li a0, 51
177 ; CHECK-NEXT: vand.vx v9, v8, a0
178 ; CHECK-NEXT: vsrl.vi v8, v8, 2
179 ; CHECK-NEXT: vand.vx v8, v8, a0
180 ; CHECK-NEXT: vadd.vv v8, v9, v8
181 ; CHECK-NEXT: vsrl.vi v9, v8, 4
182 ; CHECK-NEXT: vadd.vv v8, v8, v9
183 ; CHECK-NEXT: vand.vi v8, v8, 15
185 %head = insertelement <16 x i1> poison, i1 true, i32 0
186 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
187 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl)
191 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
193 define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
194 ; CHECK-LABEL: vp_ctpop_v2i16:
196 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
197 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
198 ; CHECK-NEXT: lui a0, 5
199 ; CHECK-NEXT: addi a0, a0, 1365
200 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
201 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
202 ; CHECK-NEXT: lui a0, 3
203 ; CHECK-NEXT: addi a0, a0, 819
204 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
205 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
206 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
207 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
208 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
209 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
210 ; CHECK-NEXT: lui a0, 1
211 ; CHECK-NEXT: addi a0, a0, -241
212 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
213 ; CHECK-NEXT: li a0, 257
214 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
215 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
217 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
221 define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
222 ; CHECK-LABEL: vp_ctpop_v2i16_unmasked:
224 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
225 ; CHECK-NEXT: vsrl.vi v9, v8, 1
226 ; CHECK-NEXT: lui a0, 5
227 ; CHECK-NEXT: addi a0, a0, 1365
228 ; CHECK-NEXT: vand.vx v9, v9, a0
229 ; CHECK-NEXT: vsub.vv v8, v8, v9
230 ; CHECK-NEXT: lui a0, 3
231 ; CHECK-NEXT: addi a0, a0, 819
232 ; CHECK-NEXT: vand.vx v9, v8, a0
233 ; CHECK-NEXT: vsrl.vi v8, v8, 2
234 ; CHECK-NEXT: vand.vx v8, v8, a0
235 ; CHECK-NEXT: vadd.vv v8, v9, v8
236 ; CHECK-NEXT: vsrl.vi v9, v8, 4
237 ; CHECK-NEXT: vadd.vv v8, v8, v9
238 ; CHECK-NEXT: lui a0, 1
239 ; CHECK-NEXT: addi a0, a0, -241
240 ; CHECK-NEXT: vand.vx v8, v8, a0
241 ; CHECK-NEXT: li a0, 257
242 ; CHECK-NEXT: vmul.vx v8, v8, a0
243 ; CHECK-NEXT: vsrl.vi v8, v8, 8
245 %head = insertelement <2 x i1> poison, i1 true, i32 0
246 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
247 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
251 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
253 define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
254 ; CHECK-LABEL: vp_ctpop_v4i16:
256 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
257 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
258 ; CHECK-NEXT: lui a0, 5
259 ; CHECK-NEXT: addi a0, a0, 1365
260 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
261 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
262 ; CHECK-NEXT: lui a0, 3
263 ; CHECK-NEXT: addi a0, a0, 819
264 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
265 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
266 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
267 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
268 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
269 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
270 ; CHECK-NEXT: lui a0, 1
271 ; CHECK-NEXT: addi a0, a0, -241
272 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
273 ; CHECK-NEXT: li a0, 257
274 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
275 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
277 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
281 define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
282 ; CHECK-LABEL: vp_ctpop_v4i16_unmasked:
284 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
285 ; CHECK-NEXT: vsrl.vi v9, v8, 1
286 ; CHECK-NEXT: lui a0, 5
287 ; CHECK-NEXT: addi a0, a0, 1365
288 ; CHECK-NEXT: vand.vx v9, v9, a0
289 ; CHECK-NEXT: vsub.vv v8, v8, v9
290 ; CHECK-NEXT: lui a0, 3
291 ; CHECK-NEXT: addi a0, a0, 819
292 ; CHECK-NEXT: vand.vx v9, v8, a0
293 ; CHECK-NEXT: vsrl.vi v8, v8, 2
294 ; CHECK-NEXT: vand.vx v8, v8, a0
295 ; CHECK-NEXT: vadd.vv v8, v9, v8
296 ; CHECK-NEXT: vsrl.vi v9, v8, 4
297 ; CHECK-NEXT: vadd.vv v8, v8, v9
298 ; CHECK-NEXT: lui a0, 1
299 ; CHECK-NEXT: addi a0, a0, -241
300 ; CHECK-NEXT: vand.vx v8, v8, a0
301 ; CHECK-NEXT: li a0, 257
302 ; CHECK-NEXT: vmul.vx v8, v8, a0
303 ; CHECK-NEXT: vsrl.vi v8, v8, 8
305 %head = insertelement <4 x i1> poison, i1 true, i32 0
306 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
307 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
311 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
313 define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
314 ; CHECK-LABEL: vp_ctpop_v8i16:
316 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
317 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
318 ; CHECK-NEXT: lui a0, 5
319 ; CHECK-NEXT: addi a0, a0, 1365
320 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
321 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
322 ; CHECK-NEXT: lui a0, 3
323 ; CHECK-NEXT: addi a0, a0, 819
324 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
325 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
326 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
327 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
328 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
329 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
330 ; CHECK-NEXT: lui a0, 1
331 ; CHECK-NEXT: addi a0, a0, -241
332 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
333 ; CHECK-NEXT: li a0, 257
334 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
335 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
337 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
341 define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
342 ; CHECK-LABEL: vp_ctpop_v8i16_unmasked:
344 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
345 ; CHECK-NEXT: vsrl.vi v9, v8, 1
346 ; CHECK-NEXT: lui a0, 5
347 ; CHECK-NEXT: addi a0, a0, 1365
348 ; CHECK-NEXT: vand.vx v9, v9, a0
349 ; CHECK-NEXT: vsub.vv v8, v8, v9
350 ; CHECK-NEXT: lui a0, 3
351 ; CHECK-NEXT: addi a0, a0, 819
352 ; CHECK-NEXT: vand.vx v9, v8, a0
353 ; CHECK-NEXT: vsrl.vi v8, v8, 2
354 ; CHECK-NEXT: vand.vx v8, v8, a0
355 ; CHECK-NEXT: vadd.vv v8, v9, v8
356 ; CHECK-NEXT: vsrl.vi v9, v8, 4
357 ; CHECK-NEXT: vadd.vv v8, v8, v9
358 ; CHECK-NEXT: lui a0, 1
359 ; CHECK-NEXT: addi a0, a0, -241
360 ; CHECK-NEXT: vand.vx v8, v8, a0
361 ; CHECK-NEXT: li a0, 257
362 ; CHECK-NEXT: vmul.vx v8, v8, a0
363 ; CHECK-NEXT: vsrl.vi v8, v8, 8
365 %head = insertelement <8 x i1> poison, i1 true, i32 0
366 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
367 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
371 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
373 define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
374 ; CHECK-LABEL: vp_ctpop_v16i16:
376 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
377 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
378 ; CHECK-NEXT: lui a0, 5
379 ; CHECK-NEXT: addi a0, a0, 1365
380 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
381 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
382 ; CHECK-NEXT: lui a0, 3
383 ; CHECK-NEXT: addi a0, a0, 819
384 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
385 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
386 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
387 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
388 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
389 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
390 ; CHECK-NEXT: lui a0, 1
391 ; CHECK-NEXT: addi a0, a0, -241
392 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
393 ; CHECK-NEXT: li a0, 257
394 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
395 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
397 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
401 define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
402 ; CHECK-LABEL: vp_ctpop_v16i16_unmasked:
404 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
405 ; CHECK-NEXT: vsrl.vi v10, v8, 1
406 ; CHECK-NEXT: lui a0, 5
407 ; CHECK-NEXT: addi a0, a0, 1365
408 ; CHECK-NEXT: vand.vx v10, v10, a0
409 ; CHECK-NEXT: vsub.vv v8, v8, v10
410 ; CHECK-NEXT: lui a0, 3
411 ; CHECK-NEXT: addi a0, a0, 819
412 ; CHECK-NEXT: vand.vx v10, v8, a0
413 ; CHECK-NEXT: vsrl.vi v8, v8, 2
414 ; CHECK-NEXT: vand.vx v8, v8, a0
415 ; CHECK-NEXT: vadd.vv v8, v10, v8
416 ; CHECK-NEXT: vsrl.vi v10, v8, 4
417 ; CHECK-NEXT: vadd.vv v8, v8, v10
418 ; CHECK-NEXT: lui a0, 1
419 ; CHECK-NEXT: addi a0, a0, -241
420 ; CHECK-NEXT: vand.vx v8, v8, a0
421 ; CHECK-NEXT: li a0, 257
422 ; CHECK-NEXT: vmul.vx v8, v8, a0
423 ; CHECK-NEXT: vsrl.vi v8, v8, 8
425 %head = insertelement <16 x i1> poison, i1 true, i32 0
426 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
427 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
431 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
433 define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
434 ; CHECK-LABEL: vp_ctpop_v2i32:
436 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
437 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
438 ; CHECK-NEXT: lui a0, 349525
439 ; CHECK-NEXT: addi a0, a0, 1365
440 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
441 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
442 ; CHECK-NEXT: lui a0, 209715
443 ; CHECK-NEXT: addi a0, a0, 819
444 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
445 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
446 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
447 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
448 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
449 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
450 ; CHECK-NEXT: lui a0, 61681
451 ; CHECK-NEXT: addi a0, a0, -241
452 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
453 ; CHECK-NEXT: lui a0, 4112
454 ; CHECK-NEXT: addi a0, a0, 257
455 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
456 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
458 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
462 define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
463 ; CHECK-LABEL: vp_ctpop_v2i32_unmasked:
465 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
466 ; CHECK-NEXT: vsrl.vi v9, v8, 1
467 ; CHECK-NEXT: lui a0, 349525
468 ; CHECK-NEXT: addi a0, a0, 1365
469 ; CHECK-NEXT: vand.vx v9, v9, a0
470 ; CHECK-NEXT: vsub.vv v8, v8, v9
471 ; CHECK-NEXT: lui a0, 209715
472 ; CHECK-NEXT: addi a0, a0, 819
473 ; CHECK-NEXT: vand.vx v9, v8, a0
474 ; CHECK-NEXT: vsrl.vi v8, v8, 2
475 ; CHECK-NEXT: vand.vx v8, v8, a0
476 ; CHECK-NEXT: vadd.vv v8, v9, v8
477 ; CHECK-NEXT: vsrl.vi v9, v8, 4
478 ; CHECK-NEXT: vadd.vv v8, v8, v9
479 ; CHECK-NEXT: lui a0, 61681
480 ; CHECK-NEXT: addi a0, a0, -241
481 ; CHECK-NEXT: vand.vx v8, v8, a0
482 ; CHECK-NEXT: lui a0, 4112
483 ; CHECK-NEXT: addi a0, a0, 257
484 ; CHECK-NEXT: vmul.vx v8, v8, a0
485 ; CHECK-NEXT: vsrl.vi v8, v8, 24
487 %head = insertelement <2 x i1> poison, i1 true, i32 0
488 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
489 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
493 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
495 define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
496 ; CHECK-LABEL: vp_ctpop_v4i32:
498 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
499 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
500 ; CHECK-NEXT: lui a0, 349525
501 ; CHECK-NEXT: addi a0, a0, 1365
502 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
503 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
504 ; CHECK-NEXT: lui a0, 209715
505 ; CHECK-NEXT: addi a0, a0, 819
506 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
507 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
508 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
509 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
510 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
511 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
512 ; CHECK-NEXT: lui a0, 61681
513 ; CHECK-NEXT: addi a0, a0, -241
514 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
515 ; CHECK-NEXT: lui a0, 4112
516 ; CHECK-NEXT: addi a0, a0, 257
517 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
518 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
520 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
524 define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
525 ; CHECK-LABEL: vp_ctpop_v4i32_unmasked:
527 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
528 ; CHECK-NEXT: vsrl.vi v9, v8, 1
529 ; CHECK-NEXT: lui a0, 349525
530 ; CHECK-NEXT: addi a0, a0, 1365
531 ; CHECK-NEXT: vand.vx v9, v9, a0
532 ; CHECK-NEXT: vsub.vv v8, v8, v9
533 ; CHECK-NEXT: lui a0, 209715
534 ; CHECK-NEXT: addi a0, a0, 819
535 ; CHECK-NEXT: vand.vx v9, v8, a0
536 ; CHECK-NEXT: vsrl.vi v8, v8, 2
537 ; CHECK-NEXT: vand.vx v8, v8, a0
538 ; CHECK-NEXT: vadd.vv v8, v9, v8
539 ; CHECK-NEXT: vsrl.vi v9, v8, 4
540 ; CHECK-NEXT: vadd.vv v8, v8, v9
541 ; CHECK-NEXT: lui a0, 61681
542 ; CHECK-NEXT: addi a0, a0, -241
543 ; CHECK-NEXT: vand.vx v8, v8, a0
544 ; CHECK-NEXT: lui a0, 4112
545 ; CHECK-NEXT: addi a0, a0, 257
546 ; CHECK-NEXT: vmul.vx v8, v8, a0
547 ; CHECK-NEXT: vsrl.vi v8, v8, 24
549 %head = insertelement <4 x i1> poison, i1 true, i32 0
550 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
551 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
555 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
557 define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
558 ; CHECK-LABEL: vp_ctpop_v8i32:
560 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
561 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
562 ; CHECK-NEXT: lui a0, 349525
563 ; CHECK-NEXT: addi a0, a0, 1365
564 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
565 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
566 ; CHECK-NEXT: lui a0, 209715
567 ; CHECK-NEXT: addi a0, a0, 819
568 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
569 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
570 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
571 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
572 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
573 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
574 ; CHECK-NEXT: lui a0, 61681
575 ; CHECK-NEXT: addi a0, a0, -241
576 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
577 ; CHECK-NEXT: lui a0, 4112
578 ; CHECK-NEXT: addi a0, a0, 257
579 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
580 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
582 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
586 define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
587 ; CHECK-LABEL: vp_ctpop_v8i32_unmasked:
589 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
590 ; CHECK-NEXT: vsrl.vi v10, v8, 1
591 ; CHECK-NEXT: lui a0, 349525
592 ; CHECK-NEXT: addi a0, a0, 1365
593 ; CHECK-NEXT: vand.vx v10, v10, a0
594 ; CHECK-NEXT: vsub.vv v8, v8, v10
595 ; CHECK-NEXT: lui a0, 209715
596 ; CHECK-NEXT: addi a0, a0, 819
597 ; CHECK-NEXT: vand.vx v10, v8, a0
598 ; CHECK-NEXT: vsrl.vi v8, v8, 2
599 ; CHECK-NEXT: vand.vx v8, v8, a0
600 ; CHECK-NEXT: vadd.vv v8, v10, v8
601 ; CHECK-NEXT: vsrl.vi v10, v8, 4
602 ; CHECK-NEXT: vadd.vv v8, v8, v10
603 ; CHECK-NEXT: lui a0, 61681
604 ; CHECK-NEXT: addi a0, a0, -241
605 ; CHECK-NEXT: vand.vx v8, v8, a0
606 ; CHECK-NEXT: lui a0, 4112
607 ; CHECK-NEXT: addi a0, a0, 257
608 ; CHECK-NEXT: vmul.vx v8, v8, a0
609 ; CHECK-NEXT: vsrl.vi v8, v8, 24
611 %head = insertelement <8 x i1> poison, i1 true, i32 0
612 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
613 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
617 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
619 define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
620 ; CHECK-LABEL: vp_ctpop_v16i32:
622 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
623 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
624 ; CHECK-NEXT: lui a0, 349525
625 ; CHECK-NEXT: addi a0, a0, 1365
626 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
627 ; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
628 ; CHECK-NEXT: lui a0, 209715
629 ; CHECK-NEXT: addi a0, a0, 819
630 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
631 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
632 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
633 ; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
634 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
635 ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
636 ; CHECK-NEXT: lui a0, 61681
637 ; CHECK-NEXT: addi a0, a0, -241
638 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
639 ; CHECK-NEXT: lui a0, 4112
640 ; CHECK-NEXT: addi a0, a0, 257
641 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
642 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
644 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
648 define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
649 ; CHECK-LABEL: vp_ctpop_v16i32_unmasked:
651 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
652 ; CHECK-NEXT: vsrl.vi v12, v8, 1
653 ; CHECK-NEXT: lui a0, 349525
654 ; CHECK-NEXT: addi a0, a0, 1365
655 ; CHECK-NEXT: vand.vx v12, v12, a0
656 ; CHECK-NEXT: vsub.vv v8, v8, v12
657 ; CHECK-NEXT: lui a0, 209715
658 ; CHECK-NEXT: addi a0, a0, 819
659 ; CHECK-NEXT: vand.vx v12, v8, a0
660 ; CHECK-NEXT: vsrl.vi v8, v8, 2
661 ; CHECK-NEXT: vand.vx v8, v8, a0
662 ; CHECK-NEXT: vadd.vv v8, v12, v8
663 ; CHECK-NEXT: vsrl.vi v12, v8, 4
664 ; CHECK-NEXT: vadd.vv v8, v8, v12
665 ; CHECK-NEXT: lui a0, 61681
666 ; CHECK-NEXT: addi a0, a0, -241
667 ; CHECK-NEXT: vand.vx v8, v8, a0
668 ; CHECK-NEXT: lui a0, 4112
669 ; CHECK-NEXT: addi a0, a0, 257
670 ; CHECK-NEXT: vmul.vx v8, v8, a0
671 ; CHECK-NEXT: vsrl.vi v8, v8, 24
673 %head = insertelement <16 x i1> poison, i1 true, i32 0
674 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
675 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
679 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
681 define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
682 ; RV32-LABEL: vp_ctpop_v2i64:
684 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
685 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
686 ; RV32-NEXT: lui a1, 349525
687 ; RV32-NEXT: addi a1, a1, 1365
688 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
689 ; RV32-NEXT: vmv.v.x v10, a1
690 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
691 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
692 ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
693 ; RV32-NEXT: lui a1, 209715
694 ; RV32-NEXT: addi a1, a1, 819
695 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
696 ; RV32-NEXT: vmv.v.x v9, a1
697 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
698 ; RV32-NEXT: vand.vv v10, v8, v9, v0.t
699 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
700 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t
701 ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
702 ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
703 ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
704 ; RV32-NEXT: lui a1, 61681
705 ; RV32-NEXT: addi a1, a1, -241
706 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
707 ; RV32-NEXT: vmv.v.x v9, a1
708 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
709 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t
710 ; RV32-NEXT: lui a1, 4112
711 ; RV32-NEXT: addi a1, a1, 257
712 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
713 ; RV32-NEXT: vmv.v.x v9, a1
714 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
715 ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
716 ; RV32-NEXT: li a0, 56
717 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
720 ; RV64-LABEL: vp_ctpop_v2i64:
722 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
723 ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
724 ; RV64-NEXT: lui a0, 349525
725 ; RV64-NEXT: addiw a0, a0, 1365
726 ; RV64-NEXT: slli a1, a0, 32
727 ; RV64-NEXT: add a0, a0, a1
728 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t
729 ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
730 ; RV64-NEXT: lui a0, 209715
731 ; RV64-NEXT: addiw a0, a0, 819
732 ; RV64-NEXT: slli a1, a0, 32
733 ; RV64-NEXT: add a0, a0, a1
734 ; RV64-NEXT: vand.vx v9, v8, a0, v0.t
735 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
736 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
737 ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
738 ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
739 ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
740 ; RV64-NEXT: lui a0, 61681
741 ; RV64-NEXT: addiw a0, a0, -241
742 ; RV64-NEXT: slli a1, a0, 32
743 ; RV64-NEXT: add a0, a0, a1
744 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
745 ; RV64-NEXT: lui a0, 4112
746 ; RV64-NEXT: addiw a0, a0, 257
747 ; RV64-NEXT: slli a1, a0, 32
748 ; RV64-NEXT: add a0, a0, a1
749 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
750 ; RV64-NEXT: li a0, 56
751 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
753 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
757 define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
758 ; RV32-LABEL: vp_ctpop_v2i64_unmasked:
760 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
761 ; RV32-NEXT: vsrl.vi v9, v8, 1
762 ; RV32-NEXT: lui a1, 349525
763 ; RV32-NEXT: addi a1, a1, 1365
764 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
765 ; RV32-NEXT: vmv.v.x v10, a1
766 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
767 ; RV32-NEXT: vand.vv v9, v9, v10
768 ; RV32-NEXT: vsub.vv v8, v8, v9
769 ; RV32-NEXT: lui a1, 209715
770 ; RV32-NEXT: addi a1, a1, 819
771 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
772 ; RV32-NEXT: vmv.v.x v9, a1
773 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
774 ; RV32-NEXT: vand.vv v10, v8, v9
775 ; RV32-NEXT: vsrl.vi v8, v8, 2
776 ; RV32-NEXT: vand.vv v8, v8, v9
777 ; RV32-NEXT: vadd.vv v8, v10, v8
778 ; RV32-NEXT: vsrl.vi v9, v8, 4
779 ; RV32-NEXT: vadd.vv v8, v8, v9
780 ; RV32-NEXT: lui a1, 61681
781 ; RV32-NEXT: addi a1, a1, -241
782 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
783 ; RV32-NEXT: vmv.v.x v9, a1
784 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
785 ; RV32-NEXT: vand.vv v8, v8, v9
786 ; RV32-NEXT: lui a1, 4112
787 ; RV32-NEXT: addi a1, a1, 257
788 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
789 ; RV32-NEXT: vmv.v.x v9, a1
790 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
791 ; RV32-NEXT: vmul.vv v8, v8, v9
792 ; RV32-NEXT: li a0, 56
793 ; RV32-NEXT: vsrl.vx v8, v8, a0
796 ; RV64-LABEL: vp_ctpop_v2i64_unmasked:
798 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
799 ; RV64-NEXT: vsrl.vi v9, v8, 1
800 ; RV64-NEXT: lui a0, 349525
801 ; RV64-NEXT: addiw a0, a0, 1365
802 ; RV64-NEXT: slli a1, a0, 32
803 ; RV64-NEXT: add a0, a0, a1
804 ; RV64-NEXT: vand.vx v9, v9, a0
805 ; RV64-NEXT: vsub.vv v8, v8, v9
806 ; RV64-NEXT: lui a0, 209715
807 ; RV64-NEXT: addiw a0, a0, 819
808 ; RV64-NEXT: slli a1, a0, 32
809 ; RV64-NEXT: add a0, a0, a1
810 ; RV64-NEXT: vand.vx v9, v8, a0
811 ; RV64-NEXT: vsrl.vi v8, v8, 2
812 ; RV64-NEXT: vand.vx v8, v8, a0
813 ; RV64-NEXT: vadd.vv v8, v9, v8
814 ; RV64-NEXT: vsrl.vi v9, v8, 4
815 ; RV64-NEXT: vadd.vv v8, v8, v9
816 ; RV64-NEXT: lui a0, 61681
817 ; RV64-NEXT: addiw a0, a0, -241
818 ; RV64-NEXT: slli a1, a0, 32
819 ; RV64-NEXT: add a0, a0, a1
820 ; RV64-NEXT: vand.vx v8, v8, a0
821 ; RV64-NEXT: lui a0, 4112
822 ; RV64-NEXT: addiw a0, a0, 257
823 ; RV64-NEXT: slli a1, a0, 32
824 ; RV64-NEXT: add a0, a0, a1
825 ; RV64-NEXT: vmul.vx v8, v8, a0
826 ; RV64-NEXT: li a0, 56
827 ; RV64-NEXT: vsrl.vx v8, v8, a0
829 %head = insertelement <2 x i1> poison, i1 true, i32 0
830 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
831 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
835 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
837 define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
838 ; RV32-LABEL: vp_ctpop_v4i64:
840 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
841 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
842 ; RV32-NEXT: lui a1, 349525
843 ; RV32-NEXT: addi a1, a1, 1365
844 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
845 ; RV32-NEXT: vmv.v.x v12, a1
846 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
847 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
848 ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
849 ; RV32-NEXT: lui a1, 209715
850 ; RV32-NEXT: addi a1, a1, 819
851 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
852 ; RV32-NEXT: vmv.v.x v10, a1
853 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
854 ; RV32-NEXT: vand.vv v12, v8, v10, v0.t
855 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
856 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
857 ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
858 ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
859 ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
860 ; RV32-NEXT: lui a1, 61681
861 ; RV32-NEXT: addi a1, a1, -241
862 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
863 ; RV32-NEXT: vmv.v.x v10, a1
864 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
865 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
866 ; RV32-NEXT: lui a1, 4112
867 ; RV32-NEXT: addi a1, a1, 257
868 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
869 ; RV32-NEXT: vmv.v.x v10, a1
870 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
871 ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
872 ; RV32-NEXT: li a0, 56
873 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
876 ; RV64-LABEL: vp_ctpop_v4i64:
878 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
879 ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
880 ; RV64-NEXT: lui a0, 349525
881 ; RV64-NEXT: addiw a0, a0, 1365
882 ; RV64-NEXT: slli a1, a0, 32
883 ; RV64-NEXT: add a0, a0, a1
884 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t
885 ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
886 ; RV64-NEXT: lui a0, 209715
887 ; RV64-NEXT: addiw a0, a0, 819
888 ; RV64-NEXT: slli a1, a0, 32
889 ; RV64-NEXT: add a0, a0, a1
890 ; RV64-NEXT: vand.vx v10, v8, a0, v0.t
891 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
892 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
893 ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
894 ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
895 ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
896 ; RV64-NEXT: lui a0, 61681
897 ; RV64-NEXT: addiw a0, a0, -241
898 ; RV64-NEXT: slli a1, a0, 32
899 ; RV64-NEXT: add a0, a0, a1
900 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
901 ; RV64-NEXT: lui a0, 4112
902 ; RV64-NEXT: addiw a0, a0, 257
903 ; RV64-NEXT: slli a1, a0, 32
904 ; RV64-NEXT: add a0, a0, a1
905 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
906 ; RV64-NEXT: li a0, 56
907 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
909 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
913 define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
914 ; RV32-LABEL: vp_ctpop_v4i64_unmasked:
916 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
917 ; RV32-NEXT: vsrl.vi v10, v8, 1
918 ; RV32-NEXT: lui a1, 349525
919 ; RV32-NEXT: addi a1, a1, 1365
920 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
921 ; RV32-NEXT: vmv.v.x v12, a1
922 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
923 ; RV32-NEXT: vand.vv v10, v10, v12
924 ; RV32-NEXT: vsub.vv v8, v8, v10
925 ; RV32-NEXT: lui a1, 209715
926 ; RV32-NEXT: addi a1, a1, 819
927 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
928 ; RV32-NEXT: vmv.v.x v10, a1
929 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
930 ; RV32-NEXT: vand.vv v12, v8, v10
931 ; RV32-NEXT: vsrl.vi v8, v8, 2
932 ; RV32-NEXT: vand.vv v8, v8, v10
933 ; RV32-NEXT: vadd.vv v8, v12, v8
934 ; RV32-NEXT: vsrl.vi v10, v8, 4
935 ; RV32-NEXT: vadd.vv v8, v8, v10
936 ; RV32-NEXT: lui a1, 61681
937 ; RV32-NEXT: addi a1, a1, -241
938 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
939 ; RV32-NEXT: vmv.v.x v10, a1
940 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
941 ; RV32-NEXT: vand.vv v8, v8, v10
942 ; RV32-NEXT: lui a1, 4112
943 ; RV32-NEXT: addi a1, a1, 257
944 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
945 ; RV32-NEXT: vmv.v.x v10, a1
946 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
947 ; RV32-NEXT: vmul.vv v8, v8, v10
948 ; RV32-NEXT: li a0, 56
949 ; RV32-NEXT: vsrl.vx v8, v8, a0
952 ; RV64-LABEL: vp_ctpop_v4i64_unmasked:
954 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
955 ; RV64-NEXT: vsrl.vi v10, v8, 1
956 ; RV64-NEXT: lui a0, 349525
957 ; RV64-NEXT: addiw a0, a0, 1365
958 ; RV64-NEXT: slli a1, a0, 32
959 ; RV64-NEXT: add a0, a0, a1
960 ; RV64-NEXT: vand.vx v10, v10, a0
961 ; RV64-NEXT: vsub.vv v8, v8, v10
962 ; RV64-NEXT: lui a0, 209715
963 ; RV64-NEXT: addiw a0, a0, 819
964 ; RV64-NEXT: slli a1, a0, 32
965 ; RV64-NEXT: add a0, a0, a1
966 ; RV64-NEXT: vand.vx v10, v8, a0
967 ; RV64-NEXT: vsrl.vi v8, v8, 2
968 ; RV64-NEXT: vand.vx v8, v8, a0
969 ; RV64-NEXT: vadd.vv v8, v10, v8
970 ; RV64-NEXT: vsrl.vi v10, v8, 4
971 ; RV64-NEXT: vadd.vv v8, v8, v10
972 ; RV64-NEXT: lui a0, 61681
973 ; RV64-NEXT: addiw a0, a0, -241
974 ; RV64-NEXT: slli a1, a0, 32
975 ; RV64-NEXT: add a0, a0, a1
976 ; RV64-NEXT: vand.vx v8, v8, a0
977 ; RV64-NEXT: lui a0, 4112
978 ; RV64-NEXT: addiw a0, a0, 257
979 ; RV64-NEXT: slli a1, a0, 32
980 ; RV64-NEXT: add a0, a0, a1
981 ; RV64-NEXT: vmul.vx v8, v8, a0
982 ; RV64-NEXT: li a0, 56
983 ; RV64-NEXT: vsrl.vx v8, v8, a0
985 %head = insertelement <4 x i1> poison, i1 true, i32 0
986 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
987 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
991 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
993 define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
994 ; RV32-LABEL: vp_ctpop_v8i64:
996 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
997 ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
998 ; RV32-NEXT: lui a1, 349525
999 ; RV32-NEXT: addi a1, a1, 1365
1000 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1001 ; RV32-NEXT: vmv.v.x v16, a1
1002 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1003 ; RV32-NEXT: vand.vv v12, v12, v16, v0.t
1004 ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
1005 ; RV32-NEXT: lui a1, 209715
1006 ; RV32-NEXT: addi a1, a1, 819
1007 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1008 ; RV32-NEXT: vmv.v.x v12, a1
1009 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1010 ; RV32-NEXT: vand.vv v16, v8, v12, v0.t
1011 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1012 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1013 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
1014 ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
1015 ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
1016 ; RV32-NEXT: lui a1, 61681
1017 ; RV32-NEXT: addi a1, a1, -241
1018 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1019 ; RV32-NEXT: vmv.v.x v12, a1
1020 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1021 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1022 ; RV32-NEXT: lui a1, 4112
1023 ; RV32-NEXT: addi a1, a1, 257
1024 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1025 ; RV32-NEXT: vmv.v.x v12, a1
1026 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1027 ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
1028 ; RV32-NEXT: li a0, 56
1029 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1032 ; RV64-LABEL: vp_ctpop_v8i64:
1034 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1035 ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
1036 ; RV64-NEXT: lui a0, 349525
1037 ; RV64-NEXT: addiw a0, a0, 1365
1038 ; RV64-NEXT: slli a1, a0, 32
1039 ; RV64-NEXT: add a0, a0, a1
1040 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t
1041 ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
1042 ; RV64-NEXT: lui a0, 209715
1043 ; RV64-NEXT: addiw a0, a0, 819
1044 ; RV64-NEXT: slli a1, a0, 32
1045 ; RV64-NEXT: add a0, a0, a1
1046 ; RV64-NEXT: vand.vx v12, v8, a0, v0.t
1047 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1048 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1049 ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
1050 ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
1051 ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
1052 ; RV64-NEXT: lui a0, 61681
1053 ; RV64-NEXT: addiw a0, a0, -241
1054 ; RV64-NEXT: slli a1, a0, 32
1055 ; RV64-NEXT: add a0, a0, a1
1056 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1057 ; RV64-NEXT: lui a0, 4112
1058 ; RV64-NEXT: addiw a0, a0, 257
1059 ; RV64-NEXT: slli a1, a0, 32
1060 ; RV64-NEXT: add a0, a0, a1
1061 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
1062 ; RV64-NEXT: li a0, 56
1063 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1065 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
1069 define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1070 ; RV32-LABEL: vp_ctpop_v8i64_unmasked:
1072 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1073 ; RV32-NEXT: vsrl.vi v12, v8, 1
1074 ; RV32-NEXT: lui a1, 349525
1075 ; RV32-NEXT: addi a1, a1, 1365
1076 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1077 ; RV32-NEXT: vmv.v.x v16, a1
1078 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1079 ; RV32-NEXT: vand.vv v12, v12, v16
1080 ; RV32-NEXT: vsub.vv v8, v8, v12
1081 ; RV32-NEXT: lui a1, 209715
1082 ; RV32-NEXT: addi a1, a1, 819
1083 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1084 ; RV32-NEXT: vmv.v.x v12, a1
1085 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1086 ; RV32-NEXT: vand.vv v16, v8, v12
1087 ; RV32-NEXT: vsrl.vi v8, v8, 2
1088 ; RV32-NEXT: vand.vv v8, v8, v12
1089 ; RV32-NEXT: vadd.vv v8, v16, v8
1090 ; RV32-NEXT: vsrl.vi v12, v8, 4
1091 ; RV32-NEXT: vadd.vv v8, v8, v12
1092 ; RV32-NEXT: lui a1, 61681
1093 ; RV32-NEXT: addi a1, a1, -241
1094 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1095 ; RV32-NEXT: vmv.v.x v12, a1
1096 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1097 ; RV32-NEXT: vand.vv v8, v8, v12
1098 ; RV32-NEXT: lui a1, 4112
1099 ; RV32-NEXT: addi a1, a1, 257
1100 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1101 ; RV32-NEXT: vmv.v.x v12, a1
1102 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1103 ; RV32-NEXT: vmul.vv v8, v8, v12
1104 ; RV32-NEXT: li a0, 56
1105 ; RV32-NEXT: vsrl.vx v8, v8, a0
1108 ; RV64-LABEL: vp_ctpop_v8i64_unmasked:
1110 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1111 ; RV64-NEXT: vsrl.vi v12, v8, 1
1112 ; RV64-NEXT: lui a0, 349525
1113 ; RV64-NEXT: addiw a0, a0, 1365
1114 ; RV64-NEXT: slli a1, a0, 32
1115 ; RV64-NEXT: add a0, a0, a1
1116 ; RV64-NEXT: vand.vx v12, v12, a0
1117 ; RV64-NEXT: vsub.vv v8, v8, v12
1118 ; RV64-NEXT: lui a0, 209715
1119 ; RV64-NEXT: addiw a0, a0, 819
1120 ; RV64-NEXT: slli a1, a0, 32
1121 ; RV64-NEXT: add a0, a0, a1
1122 ; RV64-NEXT: vand.vx v12, v8, a0
1123 ; RV64-NEXT: vsrl.vi v8, v8, 2
1124 ; RV64-NEXT: vand.vx v8, v8, a0
1125 ; RV64-NEXT: vadd.vv v8, v12, v8
1126 ; RV64-NEXT: vsrl.vi v12, v8, 4
1127 ; RV64-NEXT: vadd.vv v8, v8, v12
1128 ; RV64-NEXT: lui a0, 61681
1129 ; RV64-NEXT: addiw a0, a0, -241
1130 ; RV64-NEXT: slli a1, a0, 32
1131 ; RV64-NEXT: add a0, a0, a1
1132 ; RV64-NEXT: vand.vx v8, v8, a0
1133 ; RV64-NEXT: lui a0, 4112
1134 ; RV64-NEXT: addiw a0, a0, 257
1135 ; RV64-NEXT: slli a1, a0, 32
1136 ; RV64-NEXT: add a0, a0, a1
1137 ; RV64-NEXT: vmul.vx v8, v8, a0
1138 ; RV64-NEXT: li a0, 56
1139 ; RV64-NEXT: vsrl.vx v8, v8, a0
1141 %head = insertelement <8 x i1> poison, i1 true, i32 0
1142 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
1143 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
1147 declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32)
1149 define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1150 ; RV32-LABEL: vp_ctpop_v15i64:
1152 ; RV32-NEXT: addi sp, sp, -32
1153 ; RV32-NEXT: .cfi_def_cfa_offset 32
1154 ; RV32-NEXT: lui a1, 349525
1155 ; RV32-NEXT: addi a1, a1, 1365
1156 ; RV32-NEXT: sw a1, 28(sp)
1157 ; RV32-NEXT: sw a1, 24(sp)
1158 ; RV32-NEXT: lui a1, 209715
1159 ; RV32-NEXT: addi a1, a1, 819
1160 ; RV32-NEXT: sw a1, 20(sp)
1161 ; RV32-NEXT: sw a1, 16(sp)
1162 ; RV32-NEXT: lui a1, 61681
1163 ; RV32-NEXT: addi a1, a1, -241
1164 ; RV32-NEXT: sw a1, 12(sp)
1165 ; RV32-NEXT: sw a1, 8(sp)
1166 ; RV32-NEXT: lui a1, 4112
1167 ; RV32-NEXT: addi a1, a1, 257
1168 ; RV32-NEXT: sw a1, 4(sp)
1169 ; RV32-NEXT: sw a1, 0(sp)
1170 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1171 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1172 ; RV32-NEXT: addi a1, sp, 24
1173 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1174 ; RV32-NEXT: vlse64.v v24, (a1), zero
1175 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1176 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1177 ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1178 ; RV32-NEXT: addi a1, sp, 16
1179 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1180 ; RV32-NEXT: vlse64.v v16, (a1), zero
1181 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1182 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1183 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1184 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1185 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1186 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1187 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1188 ; RV32-NEXT: addi a1, sp, 8
1189 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1190 ; RV32-NEXT: vlse64.v v16, (a1), zero
1191 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1192 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1193 ; RV32-NEXT: mv a1, sp
1194 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1195 ; RV32-NEXT: vlse64.v v16, (a1), zero
1196 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1197 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1198 ; RV32-NEXT: li a0, 56
1199 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1200 ; RV32-NEXT: addi sp, sp, 32
1203 ; RV64-LABEL: vp_ctpop_v15i64:
1205 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1206 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1207 ; RV64-NEXT: lui a0, 349525
1208 ; RV64-NEXT: addiw a0, a0, 1365
1209 ; RV64-NEXT: slli a1, a0, 32
1210 ; RV64-NEXT: add a0, a0, a1
1211 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
1212 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1213 ; RV64-NEXT: lui a0, 209715
1214 ; RV64-NEXT: addiw a0, a0, 819
1215 ; RV64-NEXT: slli a1, a0, 32
1216 ; RV64-NEXT: add a0, a0, a1
1217 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
1218 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1219 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1220 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1221 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1222 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1223 ; RV64-NEXT: lui a0, 61681
1224 ; RV64-NEXT: addiw a0, a0, -241
1225 ; RV64-NEXT: slli a1, a0, 32
1226 ; RV64-NEXT: add a0, a0, a1
1227 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1228 ; RV64-NEXT: lui a0, 4112
1229 ; RV64-NEXT: addiw a0, a0, 257
1230 ; RV64-NEXT: slli a1, a0, 32
1231 ; RV64-NEXT: add a0, a0, a1
1232 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
1233 ; RV64-NEXT: li a0, 56
1234 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1236 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
1240 define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1241 ; RV32-LABEL: vp_ctpop_v15i64_unmasked:
1243 ; RV32-NEXT: addi sp, sp, -32
1244 ; RV32-NEXT: .cfi_def_cfa_offset 32
1245 ; RV32-NEXT: lui a1, 349525
1246 ; RV32-NEXT: addi a1, a1, 1365
1247 ; RV32-NEXT: sw a1, 28(sp)
1248 ; RV32-NEXT: sw a1, 24(sp)
1249 ; RV32-NEXT: lui a1, 209715
1250 ; RV32-NEXT: addi a1, a1, 819
1251 ; RV32-NEXT: sw a1, 20(sp)
1252 ; RV32-NEXT: sw a1, 16(sp)
1253 ; RV32-NEXT: lui a1, 61681
1254 ; RV32-NEXT: addi a1, a1, -241
1255 ; RV32-NEXT: sw a1, 12(sp)
1256 ; RV32-NEXT: sw a1, 8(sp)
1257 ; RV32-NEXT: lui a1, 4112
1258 ; RV32-NEXT: addi a1, a1, 257
1259 ; RV32-NEXT: sw a1, 4(sp)
1260 ; RV32-NEXT: sw a1, 0(sp)
1261 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1262 ; RV32-NEXT: vsrl.vi v16, v8, 1
1263 ; RV32-NEXT: addi a1, sp, 24
1264 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1265 ; RV32-NEXT: vlse64.v v24, (a1), zero
1266 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1267 ; RV32-NEXT: vand.vv v16, v16, v24
1268 ; RV32-NEXT: vsub.vv v8, v8, v16
1269 ; RV32-NEXT: addi a1, sp, 16
1270 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1271 ; RV32-NEXT: vlse64.v v16, (a1), zero
1272 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1273 ; RV32-NEXT: vand.vv v24, v8, v16
1274 ; RV32-NEXT: vsrl.vi v8, v8, 2
1275 ; RV32-NEXT: vand.vv v8, v8, v16
1276 ; RV32-NEXT: vadd.vv v8, v24, v8
1277 ; RV32-NEXT: vsrl.vi v16, v8, 4
1278 ; RV32-NEXT: vadd.vv v8, v8, v16
1279 ; RV32-NEXT: addi a1, sp, 8
1280 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1281 ; RV32-NEXT: vlse64.v v16, (a1), zero
1282 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1283 ; RV32-NEXT: vand.vv v8, v8, v16
1284 ; RV32-NEXT: mv a1, sp
1285 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1286 ; RV32-NEXT: vlse64.v v16, (a1), zero
1287 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1288 ; RV32-NEXT: vmul.vv v8, v8, v16
1289 ; RV32-NEXT: li a0, 56
1290 ; RV32-NEXT: vsrl.vx v8, v8, a0
1291 ; RV32-NEXT: addi sp, sp, 32
1294 ; RV64-LABEL: vp_ctpop_v15i64_unmasked:
1296 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1297 ; RV64-NEXT: vsrl.vi v16, v8, 1
1298 ; RV64-NEXT: lui a0, 349525
1299 ; RV64-NEXT: addiw a0, a0, 1365
1300 ; RV64-NEXT: slli a1, a0, 32
1301 ; RV64-NEXT: add a0, a0, a1
1302 ; RV64-NEXT: vand.vx v16, v16, a0
1303 ; RV64-NEXT: vsub.vv v8, v8, v16
1304 ; RV64-NEXT: lui a0, 209715
1305 ; RV64-NEXT: addiw a0, a0, 819
1306 ; RV64-NEXT: slli a1, a0, 32
1307 ; RV64-NEXT: add a0, a0, a1
1308 ; RV64-NEXT: vand.vx v16, v8, a0
1309 ; RV64-NEXT: vsrl.vi v8, v8, 2
1310 ; RV64-NEXT: vand.vx v8, v8, a0
1311 ; RV64-NEXT: vadd.vv v8, v16, v8
1312 ; RV64-NEXT: vsrl.vi v16, v8, 4
1313 ; RV64-NEXT: vadd.vv v8, v8, v16
1314 ; RV64-NEXT: lui a0, 61681
1315 ; RV64-NEXT: addiw a0, a0, -241
1316 ; RV64-NEXT: slli a1, a0, 32
1317 ; RV64-NEXT: add a0, a0, a1
1318 ; RV64-NEXT: vand.vx v8, v8, a0
1319 ; RV64-NEXT: lui a0, 4112
1320 ; RV64-NEXT: addiw a0, a0, 257
1321 ; RV64-NEXT: slli a1, a0, 32
1322 ; RV64-NEXT: add a0, a0, a1
1323 ; RV64-NEXT: vmul.vx v8, v8, a0
1324 ; RV64-NEXT: li a0, 56
1325 ; RV64-NEXT: vsrl.vx v8, v8, a0
1327 %head = insertelement <15 x i1> poison, i1 true, i32 0
1328 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
1329 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
1333 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
1335 define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
1336 ; RV32-LABEL: vp_ctpop_v16i64:
1338 ; RV32-NEXT: addi sp, sp, -32
1339 ; RV32-NEXT: .cfi_def_cfa_offset 32
1340 ; RV32-NEXT: lui a1, 349525
1341 ; RV32-NEXT: addi a1, a1, 1365
1342 ; RV32-NEXT: sw a1, 28(sp)
1343 ; RV32-NEXT: sw a1, 24(sp)
1344 ; RV32-NEXT: lui a1, 209715
1345 ; RV32-NEXT: addi a1, a1, 819
1346 ; RV32-NEXT: sw a1, 20(sp)
1347 ; RV32-NEXT: sw a1, 16(sp)
1348 ; RV32-NEXT: lui a1, 61681
1349 ; RV32-NEXT: addi a1, a1, -241
1350 ; RV32-NEXT: sw a1, 12(sp)
1351 ; RV32-NEXT: sw a1, 8(sp)
1352 ; RV32-NEXT: lui a1, 4112
1353 ; RV32-NEXT: addi a1, a1, 257
1354 ; RV32-NEXT: sw a1, 4(sp)
1355 ; RV32-NEXT: sw a1, 0(sp)
1356 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1357 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1358 ; RV32-NEXT: addi a1, sp, 24
1359 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1360 ; RV32-NEXT: vlse64.v v24, (a1), zero
1361 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1362 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1363 ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1364 ; RV32-NEXT: addi a1, sp, 16
1365 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1366 ; RV32-NEXT: vlse64.v v16, (a1), zero
1367 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1368 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1369 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1370 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1371 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1372 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1373 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1374 ; RV32-NEXT: addi a1, sp, 8
1375 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1376 ; RV32-NEXT: vlse64.v v16, (a1), zero
1377 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1378 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1379 ; RV32-NEXT: mv a1, sp
1380 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1381 ; RV32-NEXT: vlse64.v v16, (a1), zero
1382 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1383 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1384 ; RV32-NEXT: li a0, 56
1385 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
1386 ; RV32-NEXT: addi sp, sp, 32
1389 ; RV64-LABEL: vp_ctpop_v16i64:
1391 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1392 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1393 ; RV64-NEXT: lui a0, 349525
1394 ; RV64-NEXT: addiw a0, a0, 1365
1395 ; RV64-NEXT: slli a1, a0, 32
1396 ; RV64-NEXT: add a0, a0, a1
1397 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
1398 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1399 ; RV64-NEXT: lui a0, 209715
1400 ; RV64-NEXT: addiw a0, a0, 819
1401 ; RV64-NEXT: slli a1, a0, 32
1402 ; RV64-NEXT: add a0, a0, a1
1403 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
1404 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1405 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1406 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1407 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1408 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1409 ; RV64-NEXT: lui a0, 61681
1410 ; RV64-NEXT: addiw a0, a0, -241
1411 ; RV64-NEXT: slli a1, a0, 32
1412 ; RV64-NEXT: add a0, a0, a1
1413 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1414 ; RV64-NEXT: lui a0, 4112
1415 ; RV64-NEXT: addiw a0, a0, 257
1416 ; RV64-NEXT: slli a1, a0, 32
1417 ; RV64-NEXT: add a0, a0, a1
1418 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
1419 ; RV64-NEXT: li a0, 56
1420 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1422 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
1426 define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1427 ; RV32-LABEL: vp_ctpop_v16i64_unmasked:
1429 ; RV32-NEXT: addi sp, sp, -32
1430 ; RV32-NEXT: .cfi_def_cfa_offset 32
1431 ; RV32-NEXT: lui a1, 349525
1432 ; RV32-NEXT: addi a1, a1, 1365
1433 ; RV32-NEXT: sw a1, 28(sp)
1434 ; RV32-NEXT: sw a1, 24(sp)
1435 ; RV32-NEXT: lui a1, 209715
1436 ; RV32-NEXT: addi a1, a1, 819
1437 ; RV32-NEXT: sw a1, 20(sp)
1438 ; RV32-NEXT: sw a1, 16(sp)
1439 ; RV32-NEXT: lui a1, 61681
1440 ; RV32-NEXT: addi a1, a1, -241
1441 ; RV32-NEXT: sw a1, 12(sp)
1442 ; RV32-NEXT: sw a1, 8(sp)
1443 ; RV32-NEXT: lui a1, 4112
1444 ; RV32-NEXT: addi a1, a1, 257
1445 ; RV32-NEXT: sw a1, 4(sp)
1446 ; RV32-NEXT: sw a1, 0(sp)
1447 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1448 ; RV32-NEXT: vsrl.vi v16, v8, 1
1449 ; RV32-NEXT: addi a1, sp, 24
1450 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1451 ; RV32-NEXT: vlse64.v v24, (a1), zero
1452 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1453 ; RV32-NEXT: vand.vv v16, v16, v24
1454 ; RV32-NEXT: vsub.vv v8, v8, v16
1455 ; RV32-NEXT: addi a1, sp, 16
1456 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1457 ; RV32-NEXT: vlse64.v v16, (a1), zero
1458 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1459 ; RV32-NEXT: vand.vv v24, v8, v16
1460 ; RV32-NEXT: vsrl.vi v8, v8, 2
1461 ; RV32-NEXT: vand.vv v8, v8, v16
1462 ; RV32-NEXT: vadd.vv v8, v24, v8
1463 ; RV32-NEXT: vsrl.vi v16, v8, 4
1464 ; RV32-NEXT: vadd.vv v8, v8, v16
1465 ; RV32-NEXT: addi a1, sp, 8
1466 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1467 ; RV32-NEXT: vlse64.v v16, (a1), zero
1468 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1469 ; RV32-NEXT: vand.vv v8, v8, v16
1470 ; RV32-NEXT: mv a1, sp
1471 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1472 ; RV32-NEXT: vlse64.v v16, (a1), zero
1473 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1474 ; RV32-NEXT: vmul.vv v8, v8, v16
1475 ; RV32-NEXT: li a0, 56
1476 ; RV32-NEXT: vsrl.vx v8, v8, a0
1477 ; RV32-NEXT: addi sp, sp, 32
1480 ; RV64-LABEL: vp_ctpop_v16i64_unmasked:
1482 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1483 ; RV64-NEXT: vsrl.vi v16, v8, 1
1484 ; RV64-NEXT: lui a0, 349525
1485 ; RV64-NEXT: addiw a0, a0, 1365
1486 ; RV64-NEXT: slli a1, a0, 32
1487 ; RV64-NEXT: add a0, a0, a1
1488 ; RV64-NEXT: vand.vx v16, v16, a0
1489 ; RV64-NEXT: vsub.vv v8, v8, v16
1490 ; RV64-NEXT: lui a0, 209715
1491 ; RV64-NEXT: addiw a0, a0, 819
1492 ; RV64-NEXT: slli a1, a0, 32
1493 ; RV64-NEXT: add a0, a0, a1
1494 ; RV64-NEXT: vand.vx v16, v8, a0
1495 ; RV64-NEXT: vsrl.vi v8, v8, 2
1496 ; RV64-NEXT: vand.vx v8, v8, a0
1497 ; RV64-NEXT: vadd.vv v8, v16, v8
1498 ; RV64-NEXT: vsrl.vi v16, v8, 4
1499 ; RV64-NEXT: vadd.vv v8, v8, v16
1500 ; RV64-NEXT: lui a0, 61681
1501 ; RV64-NEXT: addiw a0, a0, -241
1502 ; RV64-NEXT: slli a1, a0, 32
1503 ; RV64-NEXT: add a0, a0, a1
1504 ; RV64-NEXT: vand.vx v8, v8, a0
1505 ; RV64-NEXT: lui a0, 4112
1506 ; RV64-NEXT: addiw a0, a0, 257
1507 ; RV64-NEXT: slli a1, a0, 32
1508 ; RV64-NEXT: add a0, a0, a1
1509 ; RV64-NEXT: vmul.vx v8, v8, a0
1510 ; RV64-NEXT: li a0, 56
1511 ; RV64-NEXT: vsrl.vx v8, v8, a0
1513 %head = insertelement <16 x i1> poison, i1 true, i32 0
1514 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
1515 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
1519 declare <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64>, <32 x i1>, i32)
1521 define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
1522 ; RV32-LABEL: vp_ctpop_v32i64:
1524 ; RV32-NEXT: addi sp, sp, -48
1525 ; RV32-NEXT: .cfi_def_cfa_offset 48
1526 ; RV32-NEXT: csrr a1, vlenb
1527 ; RV32-NEXT: li a2, 48
1528 ; RV32-NEXT: mul a1, a1, a2
1529 ; RV32-NEXT: sub sp, sp, a1
1530 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
1531 ; RV32-NEXT: csrr a1, vlenb
1532 ; RV32-NEXT: li a2, 40
1533 ; RV32-NEXT: mul a1, a1, a2
1534 ; RV32-NEXT: add a1, sp, a1
1535 ; RV32-NEXT: addi a1, a1, 48
1536 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1537 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1538 ; RV32-NEXT: vslidedown.vi v1, v0, 2
1539 ; RV32-NEXT: lui a1, 349525
1540 ; RV32-NEXT: addi a1, a1, 1365
1541 ; RV32-NEXT: sw a1, 44(sp)
1542 ; RV32-NEXT: sw a1, 40(sp)
1543 ; RV32-NEXT: lui a1, 209715
1544 ; RV32-NEXT: addi a1, a1, 819
1545 ; RV32-NEXT: sw a1, 36(sp)
1546 ; RV32-NEXT: sw a1, 32(sp)
1547 ; RV32-NEXT: lui a1, 61681
1548 ; RV32-NEXT: addi a1, a1, -241
1549 ; RV32-NEXT: sw a1, 28(sp)
1550 ; RV32-NEXT: sw a1, 24(sp)
1551 ; RV32-NEXT: lui a1, 4112
1552 ; RV32-NEXT: addi a1, a1, 257
1553 ; RV32-NEXT: sw a1, 20(sp)
1554 ; RV32-NEXT: li a2, 16
1555 ; RV32-NEXT: sw a1, 16(sp)
1556 ; RV32-NEXT: mv a1, a0
1557 ; RV32-NEXT: bltu a0, a2, .LBB34_2
1558 ; RV32-NEXT: # %bb.1:
1559 ; RV32-NEXT: li a1, 16
1560 ; RV32-NEXT: .LBB34_2:
1561 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1562 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1563 ; RV32-NEXT: addi a2, sp, 40
1564 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1565 ; RV32-NEXT: csrr a3, vlenb
1566 ; RV32-NEXT: li a4, 24
1567 ; RV32-NEXT: mul a3, a3, a4
1568 ; RV32-NEXT: add a3, sp, a3
1569 ; RV32-NEXT: addi a3, a3, 48
1570 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
1571 ; RV32-NEXT: vlse64.v v8, (a2), zero
1572 ; RV32-NEXT: csrr a2, vlenb
1573 ; RV32-NEXT: slli a2, a2, 5
1574 ; RV32-NEXT: add a2, sp, a2
1575 ; RV32-NEXT: addi a2, a2, 48
1576 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1577 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1578 ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1579 ; RV32-NEXT: csrr a2, vlenb
1580 ; RV32-NEXT: li a3, 24
1581 ; RV32-NEXT: mul a2, a2, a3
1582 ; RV32-NEXT: add a2, sp, a2
1583 ; RV32-NEXT: addi a2, a2, 48
1584 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1585 ; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
1586 ; RV32-NEXT: addi a2, sp, 32
1587 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1588 ; RV32-NEXT: vlse64.v v8, (a2), zero
1589 ; RV32-NEXT: csrr a2, vlenb
1590 ; RV32-NEXT: li a3, 24
1591 ; RV32-NEXT: mul a2, a2, a3
1592 ; RV32-NEXT: add a2, sp, a2
1593 ; RV32-NEXT: addi a2, a2, 48
1594 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1595 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1596 ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
1597 ; RV32-NEXT: csrr a2, vlenb
1598 ; RV32-NEXT: slli a2, a2, 4
1599 ; RV32-NEXT: add a2, sp, a2
1600 ; RV32-NEXT: addi a2, a2, 48
1601 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1602 ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
1603 ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1604 ; RV32-NEXT: csrr a2, vlenb
1605 ; RV32-NEXT: slli a2, a2, 4
1606 ; RV32-NEXT: add a2, sp, a2
1607 ; RV32-NEXT: addi a2, a2, 48
1608 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1609 ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
1610 ; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
1611 ; RV32-NEXT: vadd.vv v16, v16, v8, v0.t
1612 ; RV32-NEXT: addi a2, sp, 24
1613 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1614 ; RV32-NEXT: vlse64.v v8, (a2), zero
1615 ; RV32-NEXT: csrr a2, vlenb
1616 ; RV32-NEXT: slli a2, a2, 4
1617 ; RV32-NEXT: add a2, sp, a2
1618 ; RV32-NEXT: addi a2, a2, 48
1619 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1620 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1621 ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
1622 ; RV32-NEXT: addi a2, sp, 16
1623 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1624 ; RV32-NEXT: vlse64.v v16, (a2), zero
1625 ; RV32-NEXT: addi a2, sp, 48
1626 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1627 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1628 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1629 ; RV32-NEXT: li a1, 56
1630 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1631 ; RV32-NEXT: csrr a2, vlenb
1632 ; RV32-NEXT: slli a2, a2, 3
1633 ; RV32-NEXT: add a2, sp, a2
1634 ; RV32-NEXT: addi a2, a2, 48
1635 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1636 ; RV32-NEXT: addi a2, a0, -16
1637 ; RV32-NEXT: sltu a0, a0, a2
1638 ; RV32-NEXT: addi a0, a0, -1
1639 ; RV32-NEXT: and a0, a0, a2
1640 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1641 ; RV32-NEXT: vmv1r.v v0, v1
1642 ; RV32-NEXT: csrr a0, vlenb
1643 ; RV32-NEXT: li a2, 40
1644 ; RV32-NEXT: mul a0, a0, a2
1645 ; RV32-NEXT: add a0, sp, a0
1646 ; RV32-NEXT: addi a0, a0, 48
1647 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1648 ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
1649 ; RV32-NEXT: csrr a0, vlenb
1650 ; RV32-NEXT: slli a0, a0, 5
1651 ; RV32-NEXT: add a0, sp, a0
1652 ; RV32-NEXT: addi a0, a0, 48
1653 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1654 ; RV32-NEXT: vand.vv v8, v24, v8, v0.t
1655 ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
1656 ; RV32-NEXT: csrr a0, vlenb
1657 ; RV32-NEXT: li a2, 24
1658 ; RV32-NEXT: mul a0, a0, a2
1659 ; RV32-NEXT: add a0, sp, a0
1660 ; RV32-NEXT: addi a0, a0, 48
1661 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1662 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1663 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1664 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1665 ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
1666 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1667 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1668 ; RV32-NEXT: csrr a0, vlenb
1669 ; RV32-NEXT: slli a0, a0, 4
1670 ; RV32-NEXT: add a0, sp, a0
1671 ; RV32-NEXT: addi a0, a0, 48
1672 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1673 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1674 ; RV32-NEXT: addi a0, sp, 48
1675 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1676 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1677 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
1678 ; RV32-NEXT: csrr a0, vlenb
1679 ; RV32-NEXT: slli a0, a0, 3
1680 ; RV32-NEXT: add a0, sp, a0
1681 ; RV32-NEXT: addi a0, a0, 48
1682 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1683 ; RV32-NEXT: csrr a0, vlenb
1684 ; RV32-NEXT: li a1, 48
1685 ; RV32-NEXT: mul a0, a0, a1
1686 ; RV32-NEXT: add sp, sp, a0
1687 ; RV32-NEXT: addi sp, sp, 48
1690 ; RV64-LABEL: vp_ctpop_v32i64:
1692 ; RV64-NEXT: addi sp, sp, -16
1693 ; RV64-NEXT: .cfi_def_cfa_offset 16
1694 ; RV64-NEXT: csrr a1, vlenb
1695 ; RV64-NEXT: slli a1, a1, 4
1696 ; RV64-NEXT: sub sp, sp, a1
1697 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1698 ; RV64-NEXT: csrr a1, vlenb
1699 ; RV64-NEXT: slli a1, a1, 3
1700 ; RV64-NEXT: add a1, sp, a1
1701 ; RV64-NEXT: addi a1, a1, 16
1702 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1703 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1704 ; RV64-NEXT: li a2, 16
1705 ; RV64-NEXT: vslidedown.vi v24, v0, 2
1706 ; RV64-NEXT: mv a1, a0
1707 ; RV64-NEXT: bltu a0, a2, .LBB34_2
1708 ; RV64-NEXT: # %bb.1:
1709 ; RV64-NEXT: li a1, 16
1710 ; RV64-NEXT: .LBB34_2:
1711 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1712 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1713 ; RV64-NEXT: lui a1, 349525
1714 ; RV64-NEXT: addiw a1, a1, 1365
1715 ; RV64-NEXT: slli a2, a1, 32
1716 ; RV64-NEXT: add a1, a1, a2
1717 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1718 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1719 ; RV64-NEXT: lui a2, 209715
1720 ; RV64-NEXT: addiw a2, a2, 819
1721 ; RV64-NEXT: slli a3, a2, 32
1722 ; RV64-NEXT: add a2, a2, a3
1723 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
1724 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1725 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1726 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1727 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1728 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1729 ; RV64-NEXT: lui a3, 61681
1730 ; RV64-NEXT: addiw a3, a3, -241
1731 ; RV64-NEXT: slli a4, a3, 32
1732 ; RV64-NEXT: add a3, a3, a4
1733 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1734 ; RV64-NEXT: lui a4, 4112
1735 ; RV64-NEXT: addiw a4, a4, 257
1736 ; RV64-NEXT: slli a5, a4, 32
1737 ; RV64-NEXT: add a4, a4, a5
1738 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1739 ; RV64-NEXT: li a5, 56
1740 ; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t
1741 ; RV64-NEXT: addi a6, sp, 16
1742 ; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
1743 ; RV64-NEXT: addi a6, a0, -16
1744 ; RV64-NEXT: sltu a0, a0, a6
1745 ; RV64-NEXT: addi a0, a0, -1
1746 ; RV64-NEXT: and a0, a0, a6
1747 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1748 ; RV64-NEXT: vmv1r.v v0, v24
1749 ; RV64-NEXT: csrr a0, vlenb
1750 ; RV64-NEXT: slli a0, a0, 3
1751 ; RV64-NEXT: add a0, sp, a0
1752 ; RV64-NEXT: addi a0, a0, 16
1753 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1754 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1755 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1756 ; RV64-NEXT: vsub.vv v16, v8, v16, v0.t
1757 ; RV64-NEXT: vand.vx v8, v16, a2, v0.t
1758 ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
1759 ; RV64-NEXT: vand.vx v16, v16, a2, v0.t
1760 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1761 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1762 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1763 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1764 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1765 ; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
1766 ; RV64-NEXT: addi a0, sp, 16
1767 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1768 ; RV64-NEXT: csrr a0, vlenb
1769 ; RV64-NEXT: slli a0, a0, 4
1770 ; RV64-NEXT: add sp, sp, a0
1771 ; RV64-NEXT: addi sp, sp, 16
1773 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
1777 define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
1778 ; RV32-LABEL: vp_ctpop_v32i64_unmasked:
1780 ; RV32-NEXT: addi sp, sp, -48
1781 ; RV32-NEXT: .cfi_def_cfa_offset 48
1782 ; RV32-NEXT: csrr a1, vlenb
1783 ; RV32-NEXT: li a2, 40
1784 ; RV32-NEXT: mul a1, a1, a2
1785 ; RV32-NEXT: sub sp, sp, a1
1786 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb
1787 ; RV32-NEXT: csrr a1, vlenb
1788 ; RV32-NEXT: slli a1, a1, 5
1789 ; RV32-NEXT: add a1, sp, a1
1790 ; RV32-NEXT: addi a1, a1, 48
1791 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1792 ; RV32-NEXT: lui a1, 349525
1793 ; RV32-NEXT: addi a1, a1, 1365
1794 ; RV32-NEXT: sw a1, 44(sp)
1795 ; RV32-NEXT: sw a1, 40(sp)
1796 ; RV32-NEXT: lui a1, 209715
1797 ; RV32-NEXT: addi a1, a1, 819
1798 ; RV32-NEXT: sw a1, 36(sp)
1799 ; RV32-NEXT: sw a1, 32(sp)
1800 ; RV32-NEXT: lui a1, 61681
1801 ; RV32-NEXT: addi a1, a1, -241
1802 ; RV32-NEXT: sw a1, 28(sp)
1803 ; RV32-NEXT: sw a1, 24(sp)
1804 ; RV32-NEXT: lui a1, 4112
1805 ; RV32-NEXT: addi a1, a1, 257
1806 ; RV32-NEXT: sw a1, 20(sp)
1807 ; RV32-NEXT: li a2, 16
1808 ; RV32-NEXT: sw a1, 16(sp)
1809 ; RV32-NEXT: mv a1, a0
1810 ; RV32-NEXT: bltu a0, a2, .LBB35_2
1811 ; RV32-NEXT: # %bb.1:
1812 ; RV32-NEXT: li a1, 16
1813 ; RV32-NEXT: .LBB35_2:
1814 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1815 ; RV32-NEXT: vsrl.vi v16, v8, 1
1816 ; RV32-NEXT: addi a2, sp, 40
1817 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1818 ; RV32-NEXT: vlse64.v v24, (a2), zero
1819 ; RV32-NEXT: csrr a2, vlenb
1820 ; RV32-NEXT: li a3, 24
1821 ; RV32-NEXT: mul a2, a2, a3
1822 ; RV32-NEXT: add a2, sp, a2
1823 ; RV32-NEXT: addi a2, a2, 48
1824 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
1825 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1826 ; RV32-NEXT: vand.vv v16, v16, v24
1827 ; RV32-NEXT: vsub.vv v8, v8, v16
1828 ; RV32-NEXT: addi a2, sp, 32
1829 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1830 ; RV32-NEXT: vlse64.v v0, (a2), zero
1831 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1832 ; RV32-NEXT: vand.vv v16, v8, v0
1833 ; RV32-NEXT: vsrl.vi v8, v8, 2
1834 ; RV32-NEXT: vand.vv v8, v8, v0
1835 ; RV32-NEXT: vadd.vv v8, v16, v8
1836 ; RV32-NEXT: vsrl.vi v16, v8, 4
1837 ; RV32-NEXT: vadd.vv v8, v8, v16
1838 ; RV32-NEXT: addi a2, sp, 24
1839 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1840 ; RV32-NEXT: vlse64.v v16, (a2), zero
1841 ; RV32-NEXT: csrr a2, vlenb
1842 ; RV32-NEXT: slli a2, a2, 4
1843 ; RV32-NEXT: add a2, sp, a2
1844 ; RV32-NEXT: addi a2, a2, 48
1845 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1846 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1847 ; RV32-NEXT: vand.vv v16, v8, v16
1848 ; RV32-NEXT: addi a2, sp, 16
1849 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1850 ; RV32-NEXT: vlse64.v v8, (a2), zero
1851 ; RV32-NEXT: addi a2, sp, 48
1852 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1853 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1854 ; RV32-NEXT: vmul.vv v16, v16, v8
1855 ; RV32-NEXT: li a1, 56
1856 ; RV32-NEXT: vsrl.vx v8, v16, a1
1857 ; RV32-NEXT: csrr a2, vlenb
1858 ; RV32-NEXT: slli a2, a2, 3
1859 ; RV32-NEXT: add a2, sp, a2
1860 ; RV32-NEXT: addi a2, a2, 48
1861 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1862 ; RV32-NEXT: addi a2, a0, -16
1863 ; RV32-NEXT: sltu a0, a0, a2
1864 ; RV32-NEXT: addi a0, a0, -1
1865 ; RV32-NEXT: and a0, a0, a2
1866 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1867 ; RV32-NEXT: csrr a0, vlenb
1868 ; RV32-NEXT: slli a0, a0, 5
1869 ; RV32-NEXT: add a0, sp, a0
1870 ; RV32-NEXT: addi a0, a0, 48
1871 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1872 ; RV32-NEXT: vsrl.vi v16, v8, 1
1873 ; RV32-NEXT: csrr a0, vlenb
1874 ; RV32-NEXT: li a2, 24
1875 ; RV32-NEXT: mul a0, a0, a2
1876 ; RV32-NEXT: add a0, sp, a0
1877 ; RV32-NEXT: addi a0, a0, 48
1878 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1879 ; RV32-NEXT: vand.vv v16, v16, v24
1880 ; RV32-NEXT: vsub.vv v16, v8, v16
1881 ; RV32-NEXT: vand.vv v8, v16, v0
1882 ; RV32-NEXT: vsrl.vi v16, v16, 2
1883 ; RV32-NEXT: vand.vv v16, v16, v0
1884 ; RV32-NEXT: vadd.vv v8, v8, v16
1885 ; RV32-NEXT: vsrl.vi v16, v8, 4
1886 ; RV32-NEXT: vadd.vv v8, v8, v16
1887 ; RV32-NEXT: csrr a0, vlenb
1888 ; RV32-NEXT: slli a0, a0, 4
1889 ; RV32-NEXT: add a0, sp, a0
1890 ; RV32-NEXT: addi a0, a0, 48
1891 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1892 ; RV32-NEXT: vand.vv v8, v8, v16
1893 ; RV32-NEXT: addi a0, sp, 48
1894 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1895 ; RV32-NEXT: vmul.vv v8, v8, v16
1896 ; RV32-NEXT: vsrl.vx v16, v8, a1
1897 ; RV32-NEXT: csrr a0, vlenb
1898 ; RV32-NEXT: slli a0, a0, 3
1899 ; RV32-NEXT: add a0, sp, a0
1900 ; RV32-NEXT: addi a0, a0, 48
1901 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1902 ; RV32-NEXT: csrr a0, vlenb
1903 ; RV32-NEXT: li a1, 40
1904 ; RV32-NEXT: mul a0, a0, a1
1905 ; RV32-NEXT: add sp, sp, a0
1906 ; RV32-NEXT: addi sp, sp, 48
1909 ; RV64-LABEL: vp_ctpop_v32i64_unmasked:
1911 ; RV64-NEXT: li a2, 16
1912 ; RV64-NEXT: mv a1, a0
1913 ; RV64-NEXT: bltu a0, a2, .LBB35_2
1914 ; RV64-NEXT: # %bb.1:
1915 ; RV64-NEXT: li a1, 16
1916 ; RV64-NEXT: .LBB35_2:
1917 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1918 ; RV64-NEXT: vsrl.vi v24, v8, 1
1919 ; RV64-NEXT: lui a1, 349525
1920 ; RV64-NEXT: addiw a1, a1, 1365
1921 ; RV64-NEXT: slli a2, a1, 32
1922 ; RV64-NEXT: add a1, a1, a2
1923 ; RV64-NEXT: vand.vx v24, v24, a1
1924 ; RV64-NEXT: vsub.vv v8, v8, v24
1925 ; RV64-NEXT: lui a2, 209715
1926 ; RV64-NEXT: addiw a2, a2, 819
1927 ; RV64-NEXT: slli a3, a2, 32
1928 ; RV64-NEXT: add a2, a2, a3
1929 ; RV64-NEXT: vand.vx v24, v8, a2
1930 ; RV64-NEXT: vsrl.vi v8, v8, 2
1931 ; RV64-NEXT: vand.vx v8, v8, a2
1932 ; RV64-NEXT: vadd.vv v8, v24, v8
1933 ; RV64-NEXT: vsrl.vi v24, v8, 4
1934 ; RV64-NEXT: vadd.vv v8, v8, v24
1935 ; RV64-NEXT: lui a3, 61681
1936 ; RV64-NEXT: addiw a3, a3, -241
1937 ; RV64-NEXT: slli a4, a3, 32
1938 ; RV64-NEXT: add a3, a3, a4
1939 ; RV64-NEXT: vand.vx v8, v8, a3
1940 ; RV64-NEXT: lui a4, 4112
1941 ; RV64-NEXT: addiw a4, a4, 257
1942 ; RV64-NEXT: slli a5, a4, 32
1943 ; RV64-NEXT: add a4, a4, a5
1944 ; RV64-NEXT: vmul.vx v8, v8, a4
1945 ; RV64-NEXT: li a5, 56
1946 ; RV64-NEXT: vsrl.vx v8, v8, a5
1947 ; RV64-NEXT: addi a6, a0, -16
1948 ; RV64-NEXT: sltu a0, a0, a6
1949 ; RV64-NEXT: addi a0, a0, -1
1950 ; RV64-NEXT: and a0, a0, a6
1951 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1952 ; RV64-NEXT: vsrl.vi v24, v16, 1
1953 ; RV64-NEXT: vand.vx v24, v24, a1
1954 ; RV64-NEXT: vsub.vv v16, v16, v24
1955 ; RV64-NEXT: vand.vx v24, v16, a2
1956 ; RV64-NEXT: vsrl.vi v16, v16, 2
1957 ; RV64-NEXT: vand.vx v16, v16, a2
1958 ; RV64-NEXT: vadd.vv v16, v24, v16
1959 ; RV64-NEXT: vsrl.vi v24, v16, 4
1960 ; RV64-NEXT: vadd.vv v16, v16, v24
1961 ; RV64-NEXT: vand.vx v16, v16, a3
1962 ; RV64-NEXT: vmul.vx v16, v16, a4
1963 ; RV64-NEXT: vsrl.vx v16, v16, a5
1965 %head = insertelement <32 x i1> poison, i1 true, i32 0
1966 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
1967 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)