1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
8 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
11 declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
13 define <vscale x 1 x i8> @vp_ctpop_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; CHECK-LABEL: vp_ctpop_nxv1i8:
16 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
17 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
18 ; CHECK-NEXT: li a0, 85
19 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
20 ; CHECK-NEXT: li a0, 51
21 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
22 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
23 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
24 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
25 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
26 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
27 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
28 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
31 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8:
32 ; CHECK-ZVBB: # %bb.0:
33 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
34 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
35 ; CHECK-ZVBB-NEXT: ret
36 %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
37 ret <vscale x 1 x i8> %v
40 define <vscale x 1 x i8> @vp_ctpop_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
41 ; CHECK-LABEL: vp_ctpop_nxv1i8_unmasked:
43 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
44 ; CHECK-NEXT: vsrl.vi v9, v8, 1
45 ; CHECK-NEXT: li a0, 85
46 ; CHECK-NEXT: vand.vx v9, v9, a0
47 ; CHECK-NEXT: li a0, 51
48 ; CHECK-NEXT: vsub.vv v8, v8, v9
49 ; CHECK-NEXT: vand.vx v9, v8, a0
50 ; CHECK-NEXT: vsrl.vi v8, v8, 2
51 ; CHECK-NEXT: vand.vx v8, v8, a0
52 ; CHECK-NEXT: vadd.vv v8, v9, v8
53 ; CHECK-NEXT: vsrl.vi v9, v8, 4
54 ; CHECK-NEXT: vadd.vv v8, v8, v9
55 ; CHECK-NEXT: vand.vi v8, v8, 15
58 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8_unmasked:
59 ; CHECK-ZVBB: # %bb.0:
60 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
61 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
62 ; CHECK-ZVBB-NEXT: ret
63 %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
64 ret <vscale x 1 x i8> %v
67 declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
69 define <vscale x 2 x i8> @vp_ctpop_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
70 ; CHECK-LABEL: vp_ctpop_nxv2i8:
72 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
73 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
74 ; CHECK-NEXT: li a0, 85
75 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
76 ; CHECK-NEXT: li a0, 51
77 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
78 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
79 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
80 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
81 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
82 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
83 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
84 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
87 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8:
88 ; CHECK-ZVBB: # %bb.0:
89 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
90 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
91 ; CHECK-ZVBB-NEXT: ret
92 %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
93 ret <vscale x 2 x i8> %v
96 define <vscale x 2 x i8> @vp_ctpop_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
97 ; CHECK-LABEL: vp_ctpop_nxv2i8_unmasked:
99 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
100 ; CHECK-NEXT: vsrl.vi v9, v8, 1
101 ; CHECK-NEXT: li a0, 85
102 ; CHECK-NEXT: vand.vx v9, v9, a0
103 ; CHECK-NEXT: li a0, 51
104 ; CHECK-NEXT: vsub.vv v8, v8, v9
105 ; CHECK-NEXT: vand.vx v9, v8, a0
106 ; CHECK-NEXT: vsrl.vi v8, v8, 2
107 ; CHECK-NEXT: vand.vx v8, v8, a0
108 ; CHECK-NEXT: vadd.vv v8, v9, v8
109 ; CHECK-NEXT: vsrl.vi v9, v8, 4
110 ; CHECK-NEXT: vadd.vv v8, v8, v9
111 ; CHECK-NEXT: vand.vi v8, v8, 15
114 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8_unmasked:
115 ; CHECK-ZVBB: # %bb.0:
116 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
117 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
118 ; CHECK-ZVBB-NEXT: ret
119 %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
120 ret <vscale x 2 x i8> %v
123 declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
125 define <vscale x 4 x i8> @vp_ctpop_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
126 ; CHECK-LABEL: vp_ctpop_nxv4i8:
128 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
129 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
130 ; CHECK-NEXT: li a0, 85
131 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
132 ; CHECK-NEXT: li a0, 51
133 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
134 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
135 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
136 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
137 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
138 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
139 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
140 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
143 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8:
144 ; CHECK-ZVBB: # %bb.0:
145 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
146 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
147 ; CHECK-ZVBB-NEXT: ret
148 %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
149 ret <vscale x 4 x i8> %v
152 define <vscale x 4 x i8> @vp_ctpop_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
153 ; CHECK-LABEL: vp_ctpop_nxv4i8_unmasked:
155 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
156 ; CHECK-NEXT: vsrl.vi v9, v8, 1
157 ; CHECK-NEXT: li a0, 85
158 ; CHECK-NEXT: vand.vx v9, v9, a0
159 ; CHECK-NEXT: li a0, 51
160 ; CHECK-NEXT: vsub.vv v8, v8, v9
161 ; CHECK-NEXT: vand.vx v9, v8, a0
162 ; CHECK-NEXT: vsrl.vi v8, v8, 2
163 ; CHECK-NEXT: vand.vx v8, v8, a0
164 ; CHECK-NEXT: vadd.vv v8, v9, v8
165 ; CHECK-NEXT: vsrl.vi v9, v8, 4
166 ; CHECK-NEXT: vadd.vv v8, v8, v9
167 ; CHECK-NEXT: vand.vi v8, v8, 15
170 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8_unmasked:
171 ; CHECK-ZVBB: # %bb.0:
172 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
173 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
174 ; CHECK-ZVBB-NEXT: ret
175 %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
176 ret <vscale x 4 x i8> %v
179 declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
181 define <vscale x 8 x i8> @vp_ctpop_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
182 ; CHECK-LABEL: vp_ctpop_nxv8i8:
184 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
185 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
186 ; CHECK-NEXT: li a0, 85
187 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
188 ; CHECK-NEXT: li a0, 51
189 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
190 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
191 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
192 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
193 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
194 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
195 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
196 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
199 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8:
200 ; CHECK-ZVBB: # %bb.0:
201 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
202 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
203 ; CHECK-ZVBB-NEXT: ret
204 %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
205 ret <vscale x 8 x i8> %v
208 define <vscale x 8 x i8> @vp_ctpop_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
209 ; CHECK-LABEL: vp_ctpop_nxv8i8_unmasked:
211 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
212 ; CHECK-NEXT: vsrl.vi v9, v8, 1
213 ; CHECK-NEXT: li a0, 85
214 ; CHECK-NEXT: vand.vx v9, v9, a0
215 ; CHECK-NEXT: li a0, 51
216 ; CHECK-NEXT: vsub.vv v8, v8, v9
217 ; CHECK-NEXT: vand.vx v9, v8, a0
218 ; CHECK-NEXT: vsrl.vi v8, v8, 2
219 ; CHECK-NEXT: vand.vx v8, v8, a0
220 ; CHECK-NEXT: vadd.vv v8, v9, v8
221 ; CHECK-NEXT: vsrl.vi v9, v8, 4
222 ; CHECK-NEXT: vadd.vv v8, v8, v9
223 ; CHECK-NEXT: vand.vi v8, v8, 15
226 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8_unmasked:
227 ; CHECK-ZVBB: # %bb.0:
228 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
229 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
230 ; CHECK-ZVBB-NEXT: ret
231 %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
232 ret <vscale x 8 x i8> %v
235 declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
237 define <vscale x 16 x i8> @vp_ctpop_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
238 ; CHECK-LABEL: vp_ctpop_nxv16i8:
240 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
241 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
242 ; CHECK-NEXT: li a0, 85
243 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
244 ; CHECK-NEXT: li a0, 51
245 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
246 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
247 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
248 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
249 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
250 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
251 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
252 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
255 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8:
256 ; CHECK-ZVBB: # %bb.0:
257 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
258 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
259 ; CHECK-ZVBB-NEXT: ret
260 %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
261 ret <vscale x 16 x i8> %v
264 define <vscale x 16 x i8> @vp_ctpop_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
265 ; CHECK-LABEL: vp_ctpop_nxv16i8_unmasked:
267 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
268 ; CHECK-NEXT: vsrl.vi v10, v8, 1
269 ; CHECK-NEXT: li a0, 85
270 ; CHECK-NEXT: vand.vx v10, v10, a0
271 ; CHECK-NEXT: li a0, 51
272 ; CHECK-NEXT: vsub.vv v8, v8, v10
273 ; CHECK-NEXT: vand.vx v10, v8, a0
274 ; CHECK-NEXT: vsrl.vi v8, v8, 2
275 ; CHECK-NEXT: vand.vx v8, v8, a0
276 ; CHECK-NEXT: vadd.vv v8, v10, v8
277 ; CHECK-NEXT: vsrl.vi v10, v8, 4
278 ; CHECK-NEXT: vadd.vv v8, v8, v10
279 ; CHECK-NEXT: vand.vi v8, v8, 15
282 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8_unmasked:
283 ; CHECK-ZVBB: # %bb.0:
284 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
285 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
286 ; CHECK-ZVBB-NEXT: ret
287 %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
288 ret <vscale x 16 x i8> %v
291 declare <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32)
293 define <vscale x 32 x i8> @vp_ctpop_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
294 ; CHECK-LABEL: vp_ctpop_nxv32i8:
296 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
297 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
298 ; CHECK-NEXT: li a0, 85
299 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
300 ; CHECK-NEXT: li a0, 51
301 ; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
302 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
303 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
304 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
305 ; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
306 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
307 ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
308 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
311 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8:
312 ; CHECK-ZVBB: # %bb.0:
313 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
314 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
315 ; CHECK-ZVBB-NEXT: ret
316 %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
317 ret <vscale x 32 x i8> %v
320 define <vscale x 32 x i8> @vp_ctpop_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
321 ; CHECK-LABEL: vp_ctpop_nxv32i8_unmasked:
323 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
324 ; CHECK-NEXT: vsrl.vi v12, v8, 1
325 ; CHECK-NEXT: li a0, 85
326 ; CHECK-NEXT: vand.vx v12, v12, a0
327 ; CHECK-NEXT: li a0, 51
328 ; CHECK-NEXT: vsub.vv v8, v8, v12
329 ; CHECK-NEXT: vand.vx v12, v8, a0
330 ; CHECK-NEXT: vsrl.vi v8, v8, 2
331 ; CHECK-NEXT: vand.vx v8, v8, a0
332 ; CHECK-NEXT: vadd.vv v8, v12, v8
333 ; CHECK-NEXT: vsrl.vi v12, v8, 4
334 ; CHECK-NEXT: vadd.vv v8, v8, v12
335 ; CHECK-NEXT: vand.vi v8, v8, 15
338 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8_unmasked:
339 ; CHECK-ZVBB: # %bb.0:
340 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
341 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
342 ; CHECK-ZVBB-NEXT: ret
343 %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
344 ret <vscale x 32 x i8> %v
347 declare <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32)
349 define <vscale x 64 x i8> @vp_ctpop_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
350 ; CHECK-LABEL: vp_ctpop_nxv64i8:
352 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
353 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
354 ; CHECK-NEXT: li a0, 85
355 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
356 ; CHECK-NEXT: li a0, 51
357 ; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
358 ; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
359 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
360 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
361 ; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
362 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
363 ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
364 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
367 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8:
368 ; CHECK-ZVBB: # %bb.0:
369 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma
370 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
371 ; CHECK-ZVBB-NEXT: ret
372 %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
373 ret <vscale x 64 x i8> %v
376 define <vscale x 64 x i8> @vp_ctpop_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
377 ; CHECK-LABEL: vp_ctpop_nxv64i8_unmasked:
379 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
380 ; CHECK-NEXT: vsrl.vi v16, v8, 1
381 ; CHECK-NEXT: li a0, 85
382 ; CHECK-NEXT: vand.vx v16, v16, a0
383 ; CHECK-NEXT: li a0, 51
384 ; CHECK-NEXT: vsub.vv v8, v8, v16
385 ; CHECK-NEXT: vand.vx v16, v8, a0
386 ; CHECK-NEXT: vsrl.vi v8, v8, 2
387 ; CHECK-NEXT: vand.vx v8, v8, a0
388 ; CHECK-NEXT: vadd.vv v8, v16, v8
389 ; CHECK-NEXT: vsrl.vi v16, v8, 4
390 ; CHECK-NEXT: vadd.vv v8, v8, v16
391 ; CHECK-NEXT: vand.vi v8, v8, 15
394 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8_unmasked:
395 ; CHECK-ZVBB: # %bb.0:
396 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma
397 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
398 ; CHECK-ZVBB-NEXT: ret
399 %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
400 ret <vscale x 64 x i8> %v
403 declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
405 define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
406 ; CHECK-LABEL: vp_ctpop_nxv1i16:
408 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
409 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
410 ; CHECK-NEXT: lui a0, 5
411 ; CHECK-NEXT: addi a0, a0, 1365
412 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
413 ; CHECK-NEXT: lui a0, 3
414 ; CHECK-NEXT: addi a0, a0, 819
415 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
416 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
417 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
418 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
419 ; CHECK-NEXT: lui a0, 1
420 ; CHECK-NEXT: addi a0, a0, -241
421 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
422 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
423 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
424 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
425 ; CHECK-NEXT: li a0, 257
426 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
427 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
430 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16:
431 ; CHECK-ZVBB: # %bb.0:
432 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
433 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
434 ; CHECK-ZVBB-NEXT: ret
435 %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
436 ret <vscale x 1 x i16> %v
439 define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
440 ; CHECK-LABEL: vp_ctpop_nxv1i16_unmasked:
442 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
443 ; CHECK-NEXT: vsrl.vi v9, v8, 1
444 ; CHECK-NEXT: lui a0, 5
445 ; CHECK-NEXT: addi a0, a0, 1365
446 ; CHECK-NEXT: vand.vx v9, v9, a0
447 ; CHECK-NEXT: lui a0, 3
448 ; CHECK-NEXT: addi a0, a0, 819
449 ; CHECK-NEXT: vsub.vv v8, v8, v9
450 ; CHECK-NEXT: vand.vx v9, v8, a0
451 ; CHECK-NEXT: vsrl.vi v8, v8, 2
452 ; CHECK-NEXT: vand.vx v8, v8, a0
453 ; CHECK-NEXT: lui a0, 1
454 ; CHECK-NEXT: addi a0, a0, -241
455 ; CHECK-NEXT: vadd.vv v8, v9, v8
456 ; CHECK-NEXT: vsrl.vi v9, v8, 4
457 ; CHECK-NEXT: vadd.vv v8, v8, v9
458 ; CHECK-NEXT: vand.vx v8, v8, a0
459 ; CHECK-NEXT: li a0, 257
460 ; CHECK-NEXT: vmul.vx v8, v8, a0
461 ; CHECK-NEXT: vsrl.vi v8, v8, 8
464 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked:
465 ; CHECK-ZVBB: # %bb.0:
466 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
467 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
468 ; CHECK-ZVBB-NEXT: ret
469 %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
470 ret <vscale x 1 x i16> %v
473 declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
475 define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
476 ; CHECK-LABEL: vp_ctpop_nxv2i16:
478 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
479 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
480 ; CHECK-NEXT: lui a0, 5
481 ; CHECK-NEXT: addi a0, a0, 1365
482 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
483 ; CHECK-NEXT: lui a0, 3
484 ; CHECK-NEXT: addi a0, a0, 819
485 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
486 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
487 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
488 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
489 ; CHECK-NEXT: lui a0, 1
490 ; CHECK-NEXT: addi a0, a0, -241
491 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
492 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
493 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
494 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
495 ; CHECK-NEXT: li a0, 257
496 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
497 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
500 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16:
501 ; CHECK-ZVBB: # %bb.0:
502 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
503 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
504 ; CHECK-ZVBB-NEXT: ret
505 %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
506 ret <vscale x 2 x i16> %v
509 define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
510 ; CHECK-LABEL: vp_ctpop_nxv2i16_unmasked:
512 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
513 ; CHECK-NEXT: vsrl.vi v9, v8, 1
514 ; CHECK-NEXT: lui a0, 5
515 ; CHECK-NEXT: addi a0, a0, 1365
516 ; CHECK-NEXT: vand.vx v9, v9, a0
517 ; CHECK-NEXT: lui a0, 3
518 ; CHECK-NEXT: addi a0, a0, 819
519 ; CHECK-NEXT: vsub.vv v8, v8, v9
520 ; CHECK-NEXT: vand.vx v9, v8, a0
521 ; CHECK-NEXT: vsrl.vi v8, v8, 2
522 ; CHECK-NEXT: vand.vx v8, v8, a0
523 ; CHECK-NEXT: lui a0, 1
524 ; CHECK-NEXT: addi a0, a0, -241
525 ; CHECK-NEXT: vadd.vv v8, v9, v8
526 ; CHECK-NEXT: vsrl.vi v9, v8, 4
527 ; CHECK-NEXT: vadd.vv v8, v8, v9
528 ; CHECK-NEXT: vand.vx v8, v8, a0
529 ; CHECK-NEXT: li a0, 257
530 ; CHECK-NEXT: vmul.vx v8, v8, a0
531 ; CHECK-NEXT: vsrl.vi v8, v8, 8
534 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked:
535 ; CHECK-ZVBB: # %bb.0:
536 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
537 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
538 ; CHECK-ZVBB-NEXT: ret
539 %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
540 ret <vscale x 2 x i16> %v
543 declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
545 define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
546 ; CHECK-LABEL: vp_ctpop_nxv4i16:
548 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
549 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
550 ; CHECK-NEXT: lui a0, 5
551 ; CHECK-NEXT: addi a0, a0, 1365
552 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
553 ; CHECK-NEXT: lui a0, 3
554 ; CHECK-NEXT: addi a0, a0, 819
555 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
556 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
557 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
558 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
559 ; CHECK-NEXT: lui a0, 1
560 ; CHECK-NEXT: addi a0, a0, -241
561 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
562 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
563 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
564 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
565 ; CHECK-NEXT: li a0, 257
566 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
567 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
570 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16:
571 ; CHECK-ZVBB: # %bb.0:
572 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
573 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
574 ; CHECK-ZVBB-NEXT: ret
575 %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
576 ret <vscale x 4 x i16> %v
579 define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
580 ; CHECK-LABEL: vp_ctpop_nxv4i16_unmasked:
582 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
583 ; CHECK-NEXT: vsrl.vi v9, v8, 1
584 ; CHECK-NEXT: lui a0, 5
585 ; CHECK-NEXT: addi a0, a0, 1365
586 ; CHECK-NEXT: vand.vx v9, v9, a0
587 ; CHECK-NEXT: lui a0, 3
588 ; CHECK-NEXT: addi a0, a0, 819
589 ; CHECK-NEXT: vsub.vv v8, v8, v9
590 ; CHECK-NEXT: vand.vx v9, v8, a0
591 ; CHECK-NEXT: vsrl.vi v8, v8, 2
592 ; CHECK-NEXT: vand.vx v8, v8, a0
593 ; CHECK-NEXT: lui a0, 1
594 ; CHECK-NEXT: addi a0, a0, -241
595 ; CHECK-NEXT: vadd.vv v8, v9, v8
596 ; CHECK-NEXT: vsrl.vi v9, v8, 4
597 ; CHECK-NEXT: vadd.vv v8, v8, v9
598 ; CHECK-NEXT: vand.vx v8, v8, a0
599 ; CHECK-NEXT: li a0, 257
600 ; CHECK-NEXT: vmul.vx v8, v8, a0
601 ; CHECK-NEXT: vsrl.vi v8, v8, 8
604 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked:
605 ; CHECK-ZVBB: # %bb.0:
606 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
607 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
608 ; CHECK-ZVBB-NEXT: ret
609 %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
610 ret <vscale x 4 x i16> %v
613 declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
615 define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
616 ; CHECK-LABEL: vp_ctpop_nxv8i16:
618 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
619 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
620 ; CHECK-NEXT: lui a0, 5
621 ; CHECK-NEXT: addi a0, a0, 1365
622 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
623 ; CHECK-NEXT: lui a0, 3
624 ; CHECK-NEXT: addi a0, a0, 819
625 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
626 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
627 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
628 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
629 ; CHECK-NEXT: lui a0, 1
630 ; CHECK-NEXT: addi a0, a0, -241
631 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
632 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
633 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
634 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
635 ; CHECK-NEXT: li a0, 257
636 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
637 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
640 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16:
641 ; CHECK-ZVBB: # %bb.0:
642 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
643 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
644 ; CHECK-ZVBB-NEXT: ret
645 %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
646 ret <vscale x 8 x i16> %v
649 define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
650 ; CHECK-LABEL: vp_ctpop_nxv8i16_unmasked:
652 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
653 ; CHECK-NEXT: vsrl.vi v10, v8, 1
654 ; CHECK-NEXT: lui a0, 5
655 ; CHECK-NEXT: addi a0, a0, 1365
656 ; CHECK-NEXT: vand.vx v10, v10, a0
657 ; CHECK-NEXT: lui a0, 3
658 ; CHECK-NEXT: addi a0, a0, 819
659 ; CHECK-NEXT: vsub.vv v8, v8, v10
660 ; CHECK-NEXT: vand.vx v10, v8, a0
661 ; CHECK-NEXT: vsrl.vi v8, v8, 2
662 ; CHECK-NEXT: vand.vx v8, v8, a0
663 ; CHECK-NEXT: lui a0, 1
664 ; CHECK-NEXT: addi a0, a0, -241
665 ; CHECK-NEXT: vadd.vv v8, v10, v8
666 ; CHECK-NEXT: vsrl.vi v10, v8, 4
667 ; CHECK-NEXT: vadd.vv v8, v8, v10
668 ; CHECK-NEXT: vand.vx v8, v8, a0
669 ; CHECK-NEXT: li a0, 257
670 ; CHECK-NEXT: vmul.vx v8, v8, a0
671 ; CHECK-NEXT: vsrl.vi v8, v8, 8
674 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked:
675 ; CHECK-ZVBB: # %bb.0:
676 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
677 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
678 ; CHECK-ZVBB-NEXT: ret
679 %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
680 ret <vscale x 8 x i16> %v
683 declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
685 define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
686 ; CHECK-LABEL: vp_ctpop_nxv16i16:
688 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
689 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
690 ; CHECK-NEXT: lui a0, 5
691 ; CHECK-NEXT: addi a0, a0, 1365
692 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
693 ; CHECK-NEXT: lui a0, 3
694 ; CHECK-NEXT: addi a0, a0, 819
695 ; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
696 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
697 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
698 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
699 ; CHECK-NEXT: lui a0, 1
700 ; CHECK-NEXT: addi a0, a0, -241
701 ; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
702 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
703 ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
704 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
705 ; CHECK-NEXT: li a0, 257
706 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
707 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
710 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16:
711 ; CHECK-ZVBB: # %bb.0:
712 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
713 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
714 ; CHECK-ZVBB-NEXT: ret
715 %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
716 ret <vscale x 16 x i16> %v
719 define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
720 ; CHECK-LABEL: vp_ctpop_nxv16i16_unmasked:
722 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
723 ; CHECK-NEXT: vsrl.vi v12, v8, 1
724 ; CHECK-NEXT: lui a0, 5
725 ; CHECK-NEXT: addi a0, a0, 1365
726 ; CHECK-NEXT: vand.vx v12, v12, a0
727 ; CHECK-NEXT: lui a0, 3
728 ; CHECK-NEXT: addi a0, a0, 819
729 ; CHECK-NEXT: vsub.vv v8, v8, v12
730 ; CHECK-NEXT: vand.vx v12, v8, a0
731 ; CHECK-NEXT: vsrl.vi v8, v8, 2
732 ; CHECK-NEXT: vand.vx v8, v8, a0
733 ; CHECK-NEXT: lui a0, 1
734 ; CHECK-NEXT: addi a0, a0, -241
735 ; CHECK-NEXT: vadd.vv v8, v12, v8
736 ; CHECK-NEXT: vsrl.vi v12, v8, 4
737 ; CHECK-NEXT: vadd.vv v8, v8, v12
738 ; CHECK-NEXT: vand.vx v8, v8, a0
739 ; CHECK-NEXT: li a0, 257
740 ; CHECK-NEXT: vmul.vx v8, v8, a0
741 ; CHECK-NEXT: vsrl.vi v8, v8, 8
744 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked:
745 ; CHECK-ZVBB: # %bb.0:
746 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
747 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
748 ; CHECK-ZVBB-NEXT: ret
749 %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
750 ret <vscale x 16 x i16> %v
753 declare <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
755 define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
756 ; CHECK-LABEL: vp_ctpop_nxv32i16:
758 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
759 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
760 ; CHECK-NEXT: lui a0, 5
761 ; CHECK-NEXT: addi a0, a0, 1365
762 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
763 ; CHECK-NEXT: lui a0, 3
764 ; CHECK-NEXT: addi a0, a0, 819
765 ; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t
766 ; CHECK-NEXT: vand.vx v8, v16, a0, v0.t
767 ; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t
768 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
769 ; CHECK-NEXT: lui a0, 1
770 ; CHECK-NEXT: addi a0, a0, -241
771 ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
772 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
773 ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
774 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
775 ; CHECK-NEXT: li a0, 257
776 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
777 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
780 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16:
781 ; CHECK-ZVBB: # %bb.0:
782 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
783 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
784 ; CHECK-ZVBB-NEXT: ret
785 %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
786 ret <vscale x 32 x i16> %v
789 define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
790 ; CHECK-LABEL: vp_ctpop_nxv32i16_unmasked:
792 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
793 ; CHECK-NEXT: vsrl.vi v16, v8, 1
794 ; CHECK-NEXT: lui a0, 5
795 ; CHECK-NEXT: addi a0, a0, 1365
796 ; CHECK-NEXT: vand.vx v16, v16, a0
797 ; CHECK-NEXT: lui a0, 3
798 ; CHECK-NEXT: addi a0, a0, 819
799 ; CHECK-NEXT: vsub.vv v8, v8, v16
800 ; CHECK-NEXT: vand.vx v16, v8, a0
801 ; CHECK-NEXT: vsrl.vi v8, v8, 2
802 ; CHECK-NEXT: vand.vx v8, v8, a0
803 ; CHECK-NEXT: lui a0, 1
804 ; CHECK-NEXT: addi a0, a0, -241
805 ; CHECK-NEXT: vadd.vv v8, v16, v8
806 ; CHECK-NEXT: vsrl.vi v16, v8, 4
807 ; CHECK-NEXT: vadd.vv v8, v8, v16
808 ; CHECK-NEXT: vand.vx v8, v8, a0
809 ; CHECK-NEXT: li a0, 257
810 ; CHECK-NEXT: vmul.vx v8, v8, a0
811 ; CHECK-NEXT: vsrl.vi v8, v8, 8
814 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked:
815 ; CHECK-ZVBB: # %bb.0:
816 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
817 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
818 ; CHECK-ZVBB-NEXT: ret
819 %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
820 ret <vscale x 32 x i16> %v
823 declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
825 define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
826 ; CHECK-LABEL: vp_ctpop_nxv1i32:
828 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
829 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
830 ; CHECK-NEXT: lui a0, 349525
831 ; CHECK-NEXT: addi a0, a0, 1365
832 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
833 ; CHECK-NEXT: lui a0, 209715
834 ; CHECK-NEXT: addi a0, a0, 819
835 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
836 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
837 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
838 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
839 ; CHECK-NEXT: lui a0, 61681
840 ; CHECK-NEXT: addi a0, a0, -241
841 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
842 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
843 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
844 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
845 ; CHECK-NEXT: lui a0, 4112
846 ; CHECK-NEXT: addi a0, a0, 257
847 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
848 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
851 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32:
852 ; CHECK-ZVBB: # %bb.0:
853 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
854 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
855 ; CHECK-ZVBB-NEXT: ret
856 %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
857 ret <vscale x 1 x i32> %v
860 define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
861 ; CHECK-LABEL: vp_ctpop_nxv1i32_unmasked:
863 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
864 ; CHECK-NEXT: vsrl.vi v9, v8, 1
865 ; CHECK-NEXT: lui a0, 349525
866 ; CHECK-NEXT: addi a0, a0, 1365
867 ; CHECK-NEXT: vand.vx v9, v9, a0
868 ; CHECK-NEXT: lui a0, 209715
869 ; CHECK-NEXT: addi a0, a0, 819
870 ; CHECK-NEXT: vsub.vv v8, v8, v9
871 ; CHECK-NEXT: vand.vx v9, v8, a0
872 ; CHECK-NEXT: vsrl.vi v8, v8, 2
873 ; CHECK-NEXT: vand.vx v8, v8, a0
874 ; CHECK-NEXT: lui a0, 61681
875 ; CHECK-NEXT: addi a0, a0, -241
876 ; CHECK-NEXT: vadd.vv v8, v9, v8
877 ; CHECK-NEXT: vsrl.vi v9, v8, 4
878 ; CHECK-NEXT: vadd.vv v8, v8, v9
879 ; CHECK-NEXT: vand.vx v8, v8, a0
880 ; CHECK-NEXT: lui a0, 4112
881 ; CHECK-NEXT: addi a0, a0, 257
882 ; CHECK-NEXT: vmul.vx v8, v8, a0
883 ; CHECK-NEXT: vsrl.vi v8, v8, 24
886 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked:
887 ; CHECK-ZVBB: # %bb.0:
888 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
889 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
890 ; CHECK-ZVBB-NEXT: ret
891 %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
892 ret <vscale x 1 x i32> %v
895 declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
897 define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
898 ; CHECK-LABEL: vp_ctpop_nxv2i32:
900 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
901 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
902 ; CHECK-NEXT: lui a0, 349525
903 ; CHECK-NEXT: addi a0, a0, 1365
904 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
905 ; CHECK-NEXT: lui a0, 209715
906 ; CHECK-NEXT: addi a0, a0, 819
907 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
908 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
909 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
910 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
911 ; CHECK-NEXT: lui a0, 61681
912 ; CHECK-NEXT: addi a0, a0, -241
913 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
914 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
915 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
916 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
917 ; CHECK-NEXT: lui a0, 4112
918 ; CHECK-NEXT: addi a0, a0, 257
919 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
920 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
923 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32:
924 ; CHECK-ZVBB: # %bb.0:
925 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
926 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
927 ; CHECK-ZVBB-NEXT: ret
928 %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
929 ret <vscale x 2 x i32> %v
932 define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
933 ; CHECK-LABEL: vp_ctpop_nxv2i32_unmasked:
935 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
936 ; CHECK-NEXT: vsrl.vi v9, v8, 1
937 ; CHECK-NEXT: lui a0, 349525
938 ; CHECK-NEXT: addi a0, a0, 1365
939 ; CHECK-NEXT: vand.vx v9, v9, a0
940 ; CHECK-NEXT: lui a0, 209715
941 ; CHECK-NEXT: addi a0, a0, 819
942 ; CHECK-NEXT: vsub.vv v8, v8, v9
943 ; CHECK-NEXT: vand.vx v9, v8, a0
944 ; CHECK-NEXT: vsrl.vi v8, v8, 2
945 ; CHECK-NEXT: vand.vx v8, v8, a0
946 ; CHECK-NEXT: lui a0, 61681
947 ; CHECK-NEXT: addi a0, a0, -241
948 ; CHECK-NEXT: vadd.vv v8, v9, v8
949 ; CHECK-NEXT: vsrl.vi v9, v8, 4
950 ; CHECK-NEXT: vadd.vv v8, v8, v9
951 ; CHECK-NEXT: vand.vx v8, v8, a0
952 ; CHECK-NEXT: lui a0, 4112
953 ; CHECK-NEXT: addi a0, a0, 257
954 ; CHECK-NEXT: vmul.vx v8, v8, a0
955 ; CHECK-NEXT: vsrl.vi v8, v8, 24
958 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked:
959 ; CHECK-ZVBB: # %bb.0:
960 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
961 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
962 ; CHECK-ZVBB-NEXT: ret
963 %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
964 ret <vscale x 2 x i32> %v
967 declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
969 define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
970 ; CHECK-LABEL: vp_ctpop_nxv4i32:
972 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
973 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
974 ; CHECK-NEXT: lui a0, 349525
975 ; CHECK-NEXT: addi a0, a0, 1365
976 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
977 ; CHECK-NEXT: lui a0, 209715
978 ; CHECK-NEXT: addi a0, a0, 819
979 ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
980 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
981 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
982 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
983 ; CHECK-NEXT: lui a0, 61681
984 ; CHECK-NEXT: addi a0, a0, -241
985 ; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
986 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
987 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
988 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
989 ; CHECK-NEXT: lui a0, 4112
990 ; CHECK-NEXT: addi a0, a0, 257
991 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
992 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
995 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32:
996 ; CHECK-ZVBB: # %bb.0:
997 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
998 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
999 ; CHECK-ZVBB-NEXT: ret
1000 %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
1001 ret <vscale x 4 x i32> %v
1004 define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
1005 ; CHECK-LABEL: vp_ctpop_nxv4i32_unmasked:
1007 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1008 ; CHECK-NEXT: vsrl.vi v10, v8, 1
1009 ; CHECK-NEXT: lui a0, 349525
1010 ; CHECK-NEXT: addi a0, a0, 1365
1011 ; CHECK-NEXT: vand.vx v10, v10, a0
1012 ; CHECK-NEXT: lui a0, 209715
1013 ; CHECK-NEXT: addi a0, a0, 819
1014 ; CHECK-NEXT: vsub.vv v8, v8, v10
1015 ; CHECK-NEXT: vand.vx v10, v8, a0
1016 ; CHECK-NEXT: vsrl.vi v8, v8, 2
1017 ; CHECK-NEXT: vand.vx v8, v8, a0
1018 ; CHECK-NEXT: lui a0, 61681
1019 ; CHECK-NEXT: addi a0, a0, -241
1020 ; CHECK-NEXT: vadd.vv v8, v10, v8
1021 ; CHECK-NEXT: vsrl.vi v10, v8, 4
1022 ; CHECK-NEXT: vadd.vv v8, v8, v10
1023 ; CHECK-NEXT: vand.vx v8, v8, a0
1024 ; CHECK-NEXT: lui a0, 4112
1025 ; CHECK-NEXT: addi a0, a0, 257
1026 ; CHECK-NEXT: vmul.vx v8, v8, a0
1027 ; CHECK-NEXT: vsrl.vi v8, v8, 24
1030 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked:
1031 ; CHECK-ZVBB: # %bb.0:
1032 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1033 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1034 ; CHECK-ZVBB-NEXT: ret
1035 %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1036 ret <vscale x 4 x i32> %v
1039 declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1041 define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1042 ; CHECK-LABEL: vp_ctpop_nxv8i32:
1044 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1045 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
1046 ; CHECK-NEXT: lui a0, 349525
1047 ; CHECK-NEXT: addi a0, a0, 1365
1048 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1049 ; CHECK-NEXT: lui a0, 209715
1050 ; CHECK-NEXT: addi a0, a0, 819
1051 ; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
1052 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
1053 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
1054 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1055 ; CHECK-NEXT: lui a0, 61681
1056 ; CHECK-NEXT: addi a0, a0, -241
1057 ; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
1058 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
1059 ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
1060 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1061 ; CHECK-NEXT: lui a0, 4112
1062 ; CHECK-NEXT: addi a0, a0, 257
1063 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
1064 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
1067 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32:
1068 ; CHECK-ZVBB: # %bb.0:
1069 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1070 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1071 ; CHECK-ZVBB-NEXT: ret
1072 %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
1073 ret <vscale x 8 x i32> %v
1076 define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
1077 ; CHECK-LABEL: vp_ctpop_nxv8i32_unmasked:
1079 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1080 ; CHECK-NEXT: vsrl.vi v12, v8, 1
1081 ; CHECK-NEXT: lui a0, 349525
1082 ; CHECK-NEXT: addi a0, a0, 1365
1083 ; CHECK-NEXT: vand.vx v12, v12, a0
1084 ; CHECK-NEXT: lui a0, 209715
1085 ; CHECK-NEXT: addi a0, a0, 819
1086 ; CHECK-NEXT: vsub.vv v8, v8, v12
1087 ; CHECK-NEXT: vand.vx v12, v8, a0
1088 ; CHECK-NEXT: vsrl.vi v8, v8, 2
1089 ; CHECK-NEXT: vand.vx v8, v8, a0
1090 ; CHECK-NEXT: lui a0, 61681
1091 ; CHECK-NEXT: addi a0, a0, -241
1092 ; CHECK-NEXT: vadd.vv v8, v12, v8
1093 ; CHECK-NEXT: vsrl.vi v12, v8, 4
1094 ; CHECK-NEXT: vadd.vv v8, v8, v12
1095 ; CHECK-NEXT: vand.vx v8, v8, a0
1096 ; CHECK-NEXT: lui a0, 4112
1097 ; CHECK-NEXT: addi a0, a0, 257
1098 ; CHECK-NEXT: vmul.vx v8, v8, a0
1099 ; CHECK-NEXT: vsrl.vi v8, v8, 24
1102 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked:
1103 ; CHECK-ZVBB: # %bb.0:
1104 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1105 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1106 ; CHECK-ZVBB-NEXT: ret
1107 %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1108 ret <vscale x 8 x i32> %v
1111 declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1113 define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1114 ; CHECK-LABEL: vp_ctpop_nxv16i32:
1116 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1117 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
1118 ; CHECK-NEXT: lui a0, 349525
1119 ; CHECK-NEXT: addi a0, a0, 1365
1120 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1121 ; CHECK-NEXT: lui a0, 209715
1122 ; CHECK-NEXT: addi a0, a0, 819
1123 ; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t
1124 ; CHECK-NEXT: vand.vx v8, v16, a0, v0.t
1125 ; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t
1126 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1127 ; CHECK-NEXT: lui a0, 61681
1128 ; CHECK-NEXT: addi a0, a0, -241
1129 ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
1130 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
1131 ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
1132 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1133 ; CHECK-NEXT: lui a0, 4112
1134 ; CHECK-NEXT: addi a0, a0, 257
1135 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
1136 ; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
1139 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32:
1140 ; CHECK-ZVBB: # %bb.0:
1141 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1142 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1143 ; CHECK-ZVBB-NEXT: ret
1144 %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
1145 ret <vscale x 16 x i32> %v
1148 define <vscale x 16 x i32> @vp_ctpop_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
1149 ; CHECK-LABEL: vp_ctpop_nxv16i32_unmasked:
1151 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1152 ; CHECK-NEXT: vsrl.vi v16, v8, 1
1153 ; CHECK-NEXT: lui a0, 349525
1154 ; CHECK-NEXT: addi a0, a0, 1365
1155 ; CHECK-NEXT: vand.vx v16, v16, a0
1156 ; CHECK-NEXT: lui a0, 209715
1157 ; CHECK-NEXT: addi a0, a0, 819
1158 ; CHECK-NEXT: vsub.vv v8, v8, v16
1159 ; CHECK-NEXT: vand.vx v16, v8, a0
1160 ; CHECK-NEXT: vsrl.vi v8, v8, 2
1161 ; CHECK-NEXT: vand.vx v8, v8, a0
1162 ; CHECK-NEXT: lui a0, 61681
1163 ; CHECK-NEXT: addi a0, a0, -241
1164 ; CHECK-NEXT: vadd.vv v8, v16, v8
1165 ; CHECK-NEXT: vsrl.vi v16, v8, 4
1166 ; CHECK-NEXT: vadd.vv v8, v8, v16
1167 ; CHECK-NEXT: vand.vx v8, v8, a0
1168 ; CHECK-NEXT: lui a0, 4112
1169 ; CHECK-NEXT: addi a0, a0, 257
1170 ; CHECK-NEXT: vmul.vx v8, v8, a0
1171 ; CHECK-NEXT: vsrl.vi v8, v8, 24
1174 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked:
1175 ; CHECK-ZVBB: # %bb.0:
1176 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1177 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1178 ; CHECK-ZVBB-NEXT: ret
1179 %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1180 ret <vscale x 16 x i32> %v
1183 declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1185 define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1186 ; RV32-LABEL: vp_ctpop_nxv1i64:
1188 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1189 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
1190 ; RV32-NEXT: lui a1, 349525
1191 ; RV32-NEXT: addi a1, a1, 1365
1192 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1193 ; RV32-NEXT: vmv.v.x v10, a1
1194 ; RV32-NEXT: lui a1, 209715
1195 ; RV32-NEXT: addi a1, a1, 819
1196 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1197 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
1198 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1199 ; RV32-NEXT: vmv.v.x v10, a1
1200 ; RV32-NEXT: lui a1, 61681
1201 ; RV32-NEXT: addi a1, a1, -241
1202 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1203 ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
1204 ; RV32-NEXT: vand.vv v9, v8, v10, v0.t
1205 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1206 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1207 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1208 ; RV32-NEXT: vmv.v.x v10, a1
1209 ; RV32-NEXT: lui a1, 4112
1210 ; RV32-NEXT: addi a1, a1, 257
1211 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1212 ; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
1213 ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
1214 ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
1215 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1216 ; RV32-NEXT: vmv.v.x v9, a1
1217 ; RV32-NEXT: li a1, 56
1218 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1219 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1220 ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
1221 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1224 ; RV64-LABEL: vp_ctpop_nxv1i64:
1226 ; RV64-NEXT: lui a1, 349525
1227 ; RV64-NEXT: lui a2, 209715
1228 ; RV64-NEXT: lui a3, 61681
1229 ; RV64-NEXT: lui a4, 4112
1230 ; RV64-NEXT: addiw a1, a1, 1365
1231 ; RV64-NEXT: addiw a2, a2, 819
1232 ; RV64-NEXT: addiw a3, a3, -241
1233 ; RV64-NEXT: addiw a4, a4, 257
1234 ; RV64-NEXT: slli a5, a1, 32
1235 ; RV64-NEXT: add a1, a1, a5
1236 ; RV64-NEXT: slli a5, a2, 32
1237 ; RV64-NEXT: add a2, a2, a5
1238 ; RV64-NEXT: slli a5, a3, 32
1239 ; RV64-NEXT: add a3, a3, a5
1240 ; RV64-NEXT: slli a5, a4, 32
1241 ; RV64-NEXT: add a4, a4, a5
1242 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1243 ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
1244 ; RV64-NEXT: vand.vx v9, v9, a1, v0.t
1245 ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
1246 ; RV64-NEXT: vand.vx v9, v8, a2, v0.t
1247 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1248 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1249 ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
1250 ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
1251 ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
1252 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1253 ; RV64-NEXT: li a0, 56
1254 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1255 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1258 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64:
1259 ; CHECK-ZVBB: # %bb.0:
1260 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1261 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1262 ; CHECK-ZVBB-NEXT: ret
1263 %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
1264 ret <vscale x 1 x i64> %v
1267 define <vscale x 1 x i64> @vp_ctpop_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
1268 ; RV32-LABEL: vp_ctpop_nxv1i64_unmasked:
1270 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1271 ; RV32-NEXT: vsrl.vi v9, v8, 1
1272 ; RV32-NEXT: lui a1, 349525
1273 ; RV32-NEXT: addi a1, a1, 1365
1274 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1275 ; RV32-NEXT: vmv.v.x v10, a1
1276 ; RV32-NEXT: lui a1, 209715
1277 ; RV32-NEXT: addi a1, a1, 819
1278 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1279 ; RV32-NEXT: vand.vv v9, v9, v10
1280 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1281 ; RV32-NEXT: vmv.v.x v10, a1
1282 ; RV32-NEXT: lui a1, 61681
1283 ; RV32-NEXT: addi a1, a1, -241
1284 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1285 ; RV32-NEXT: vsub.vv v8, v8, v9
1286 ; RV32-NEXT: vand.vv v9, v8, v10
1287 ; RV32-NEXT: vsrl.vi v8, v8, 2
1288 ; RV32-NEXT: vand.vv v8, v8, v10
1289 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1290 ; RV32-NEXT: vmv.v.x v10, a1
1291 ; RV32-NEXT: lui a1, 4112
1292 ; RV32-NEXT: addi a1, a1, 257
1293 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1294 ; RV32-NEXT: vadd.vv v8, v9, v8
1295 ; RV32-NEXT: vsrl.vi v9, v8, 4
1296 ; RV32-NEXT: vadd.vv v8, v8, v9
1297 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1298 ; RV32-NEXT: vmv.v.x v9, a1
1299 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1300 ; RV32-NEXT: vand.vv v8, v8, v10
1301 ; RV32-NEXT: vmul.vv v8, v8, v9
1302 ; RV32-NEXT: li a0, 56
1303 ; RV32-NEXT: vsrl.vx v8, v8, a0
1306 ; RV64-LABEL: vp_ctpop_nxv1i64_unmasked:
1308 ; RV64-NEXT: lui a1, 349525
1309 ; RV64-NEXT: lui a2, 209715
1310 ; RV64-NEXT: lui a3, 61681
1311 ; RV64-NEXT: lui a4, 4112
1312 ; RV64-NEXT: addiw a1, a1, 1365
1313 ; RV64-NEXT: addiw a2, a2, 819
1314 ; RV64-NEXT: addiw a3, a3, -241
1315 ; RV64-NEXT: addiw a4, a4, 257
1316 ; RV64-NEXT: slli a5, a1, 32
1317 ; RV64-NEXT: add a1, a1, a5
1318 ; RV64-NEXT: slli a5, a2, 32
1319 ; RV64-NEXT: add a2, a2, a5
1320 ; RV64-NEXT: slli a5, a3, 32
1321 ; RV64-NEXT: add a3, a3, a5
1322 ; RV64-NEXT: slli a5, a4, 32
1323 ; RV64-NEXT: add a4, a4, a5
1324 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1325 ; RV64-NEXT: vsrl.vi v9, v8, 1
1326 ; RV64-NEXT: vand.vx v9, v9, a1
1327 ; RV64-NEXT: vsub.vv v8, v8, v9
1328 ; RV64-NEXT: vand.vx v9, v8, a2
1329 ; RV64-NEXT: vsrl.vi v8, v8, 2
1330 ; RV64-NEXT: vand.vx v8, v8, a2
1331 ; RV64-NEXT: vadd.vv v8, v9, v8
1332 ; RV64-NEXT: vsrl.vi v9, v8, 4
1333 ; RV64-NEXT: vadd.vv v8, v8, v9
1334 ; RV64-NEXT: vand.vx v8, v8, a3
1335 ; RV64-NEXT: vmul.vx v8, v8, a4
1336 ; RV64-NEXT: li a0, 56
1337 ; RV64-NEXT: vsrl.vx v8, v8, a0
1340 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64_unmasked:
1341 ; CHECK-ZVBB: # %bb.0:
1342 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1343 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1344 ; CHECK-ZVBB-NEXT: ret
1345 %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1346 ret <vscale x 1 x i64> %v
1349 declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1351 define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1352 ; RV32-LABEL: vp_ctpop_nxv2i64:
1354 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1355 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
1356 ; RV32-NEXT: lui a1, 349525
1357 ; RV32-NEXT: addi a1, a1, 1365
1358 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1359 ; RV32-NEXT: vmv.v.x v12, a1
1360 ; RV32-NEXT: lui a1, 209715
1361 ; RV32-NEXT: addi a1, a1, 819
1362 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1363 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
1364 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1365 ; RV32-NEXT: vmv.v.x v12, a1
1366 ; RV32-NEXT: lui a1, 61681
1367 ; RV32-NEXT: addi a1, a1, -241
1368 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1369 ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
1370 ; RV32-NEXT: vand.vv v10, v8, v12, v0.t
1371 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1372 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1373 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1374 ; RV32-NEXT: vmv.v.x v12, a1
1375 ; RV32-NEXT: lui a1, 4112
1376 ; RV32-NEXT: addi a1, a1, 257
1377 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1378 ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
1379 ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
1380 ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
1381 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1382 ; RV32-NEXT: vmv.v.x v10, a1
1383 ; RV32-NEXT: li a1, 56
1384 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1385 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1386 ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
1387 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1390 ; RV64-LABEL: vp_ctpop_nxv2i64:
1392 ; RV64-NEXT: lui a1, 349525
1393 ; RV64-NEXT: lui a2, 209715
1394 ; RV64-NEXT: lui a3, 61681
1395 ; RV64-NEXT: lui a4, 4112
1396 ; RV64-NEXT: addiw a1, a1, 1365
1397 ; RV64-NEXT: addiw a2, a2, 819
1398 ; RV64-NEXT: addiw a3, a3, -241
1399 ; RV64-NEXT: addiw a4, a4, 257
1400 ; RV64-NEXT: slli a5, a1, 32
1401 ; RV64-NEXT: add a1, a1, a5
1402 ; RV64-NEXT: slli a5, a2, 32
1403 ; RV64-NEXT: add a2, a2, a5
1404 ; RV64-NEXT: slli a5, a3, 32
1405 ; RV64-NEXT: add a3, a3, a5
1406 ; RV64-NEXT: slli a5, a4, 32
1407 ; RV64-NEXT: add a4, a4, a5
1408 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1409 ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
1410 ; RV64-NEXT: vand.vx v10, v10, a1, v0.t
1411 ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
1412 ; RV64-NEXT: vand.vx v10, v8, a2, v0.t
1413 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1414 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1415 ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
1416 ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
1417 ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
1418 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1419 ; RV64-NEXT: li a0, 56
1420 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1421 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1424 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64:
1425 ; CHECK-ZVBB: # %bb.0:
1426 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1427 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1428 ; CHECK-ZVBB-NEXT: ret
1429 %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
1430 ret <vscale x 2 x i64> %v
1433 define <vscale x 2 x i64> @vp_ctpop_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
1434 ; RV32-LABEL: vp_ctpop_nxv2i64_unmasked:
1436 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1437 ; RV32-NEXT: vsrl.vi v10, v8, 1
1438 ; RV32-NEXT: lui a1, 349525
1439 ; RV32-NEXT: addi a1, a1, 1365
1440 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1441 ; RV32-NEXT: vmv.v.x v12, a1
1442 ; RV32-NEXT: lui a1, 209715
1443 ; RV32-NEXT: addi a1, a1, 819
1444 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1445 ; RV32-NEXT: vand.vv v10, v10, v12
1446 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1447 ; RV32-NEXT: vmv.v.x v12, a1
1448 ; RV32-NEXT: lui a1, 61681
1449 ; RV32-NEXT: addi a1, a1, -241
1450 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1451 ; RV32-NEXT: vsub.vv v8, v8, v10
1452 ; RV32-NEXT: vand.vv v10, v8, v12
1453 ; RV32-NEXT: vsrl.vi v8, v8, 2
1454 ; RV32-NEXT: vand.vv v8, v8, v12
1455 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1456 ; RV32-NEXT: vmv.v.x v12, a1
1457 ; RV32-NEXT: lui a1, 4112
1458 ; RV32-NEXT: addi a1, a1, 257
1459 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1460 ; RV32-NEXT: vadd.vv v8, v10, v8
1461 ; RV32-NEXT: vsrl.vi v10, v8, 4
1462 ; RV32-NEXT: vadd.vv v8, v8, v10
1463 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1464 ; RV32-NEXT: vmv.v.x v10, a1
1465 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1466 ; RV32-NEXT: vand.vv v8, v8, v12
1467 ; RV32-NEXT: vmul.vv v8, v8, v10
1468 ; RV32-NEXT: li a0, 56
1469 ; RV32-NEXT: vsrl.vx v8, v8, a0
1472 ; RV64-LABEL: vp_ctpop_nxv2i64_unmasked:
1474 ; RV64-NEXT: lui a1, 349525
1475 ; RV64-NEXT: lui a2, 209715
1476 ; RV64-NEXT: lui a3, 61681
1477 ; RV64-NEXT: lui a4, 4112
1478 ; RV64-NEXT: addiw a1, a1, 1365
1479 ; RV64-NEXT: addiw a2, a2, 819
1480 ; RV64-NEXT: addiw a3, a3, -241
1481 ; RV64-NEXT: addiw a4, a4, 257
1482 ; RV64-NEXT: slli a5, a1, 32
1483 ; RV64-NEXT: add a1, a1, a5
1484 ; RV64-NEXT: slli a5, a2, 32
1485 ; RV64-NEXT: add a2, a2, a5
1486 ; RV64-NEXT: slli a5, a3, 32
1487 ; RV64-NEXT: add a3, a3, a5
1488 ; RV64-NEXT: slli a5, a4, 32
1489 ; RV64-NEXT: add a4, a4, a5
1490 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1491 ; RV64-NEXT: vsrl.vi v10, v8, 1
1492 ; RV64-NEXT: vand.vx v10, v10, a1
1493 ; RV64-NEXT: vsub.vv v8, v8, v10
1494 ; RV64-NEXT: vand.vx v10, v8, a2
1495 ; RV64-NEXT: vsrl.vi v8, v8, 2
1496 ; RV64-NEXT: vand.vx v8, v8, a2
1497 ; RV64-NEXT: vadd.vv v8, v10, v8
1498 ; RV64-NEXT: vsrl.vi v10, v8, 4
1499 ; RV64-NEXT: vadd.vv v8, v8, v10
1500 ; RV64-NEXT: vand.vx v8, v8, a3
1501 ; RV64-NEXT: vmul.vx v8, v8, a4
1502 ; RV64-NEXT: li a0, 56
1503 ; RV64-NEXT: vsrl.vx v8, v8, a0
1506 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64_unmasked:
1507 ; CHECK-ZVBB: # %bb.0:
1508 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1509 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1510 ; CHECK-ZVBB-NEXT: ret
1511 %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1512 ret <vscale x 2 x i64> %v
1515 declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1517 define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1518 ; RV32-LABEL: vp_ctpop_nxv4i64:
1520 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1521 ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
1522 ; RV32-NEXT: lui a1, 349525
1523 ; RV32-NEXT: addi a1, a1, 1365
1524 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1525 ; RV32-NEXT: vmv.v.x v16, a1
1526 ; RV32-NEXT: lui a1, 209715
1527 ; RV32-NEXT: addi a1, a1, 819
1528 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1529 ; RV32-NEXT: vand.vv v16, v12, v16, v0.t
1530 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1531 ; RV32-NEXT: vmv.v.x v12, a1
1532 ; RV32-NEXT: lui a1, 61681
1533 ; RV32-NEXT: addi a1, a1, -241
1534 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1535 ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1536 ; RV32-NEXT: vand.vv v16, v8, v12, v0.t
1537 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1538 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1539 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1540 ; RV32-NEXT: vmv.v.x v12, a1
1541 ; RV32-NEXT: lui a1, 4112
1542 ; RV32-NEXT: addi a1, a1, 257
1543 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1544 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
1545 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1546 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1547 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1548 ; RV32-NEXT: vmv.v.x v16, a1
1549 ; RV32-NEXT: li a1, 56
1550 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1551 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1552 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
1553 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1556 ; RV64-LABEL: vp_ctpop_nxv4i64:
1558 ; RV64-NEXT: lui a1, 349525
1559 ; RV64-NEXT: lui a2, 209715
1560 ; RV64-NEXT: lui a3, 61681
1561 ; RV64-NEXT: lui a4, 4112
1562 ; RV64-NEXT: addiw a1, a1, 1365
1563 ; RV64-NEXT: addiw a2, a2, 819
1564 ; RV64-NEXT: addiw a3, a3, -241
1565 ; RV64-NEXT: addiw a4, a4, 257
1566 ; RV64-NEXT: slli a5, a1, 32
1567 ; RV64-NEXT: add a1, a1, a5
1568 ; RV64-NEXT: slli a5, a2, 32
1569 ; RV64-NEXT: add a2, a2, a5
1570 ; RV64-NEXT: slli a5, a3, 32
1571 ; RV64-NEXT: add a3, a3, a5
1572 ; RV64-NEXT: slli a5, a4, 32
1573 ; RV64-NEXT: add a4, a4, a5
1574 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1575 ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
1576 ; RV64-NEXT: vand.vx v12, v12, a1, v0.t
1577 ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
1578 ; RV64-NEXT: vand.vx v12, v8, a2, v0.t
1579 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1580 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1581 ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
1582 ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
1583 ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
1584 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1585 ; RV64-NEXT: li a0, 56
1586 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1587 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1590 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64:
1591 ; CHECK-ZVBB: # %bb.0:
1592 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1593 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1594 ; CHECK-ZVBB-NEXT: ret
1595 %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
1596 ret <vscale x 4 x i64> %v
1599 define <vscale x 4 x i64> @vp_ctpop_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
1600 ; RV32-LABEL: vp_ctpop_nxv4i64_unmasked:
1602 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1603 ; RV32-NEXT: vsrl.vi v12, v8, 1
1604 ; RV32-NEXT: lui a1, 349525
1605 ; RV32-NEXT: addi a1, a1, 1365
1606 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1607 ; RV32-NEXT: vmv.v.x v16, a1
1608 ; RV32-NEXT: lui a1, 209715
1609 ; RV32-NEXT: addi a1, a1, 819
1610 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1611 ; RV32-NEXT: vand.vv v12, v12, v16
1612 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1613 ; RV32-NEXT: vmv.v.x v16, a1
1614 ; RV32-NEXT: lui a1, 61681
1615 ; RV32-NEXT: addi a1, a1, -241
1616 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1617 ; RV32-NEXT: vsub.vv v8, v8, v12
1618 ; RV32-NEXT: vand.vv v12, v8, v16
1619 ; RV32-NEXT: vsrl.vi v8, v8, 2
1620 ; RV32-NEXT: vand.vv v8, v8, v16
1621 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1622 ; RV32-NEXT: vmv.v.x v16, a1
1623 ; RV32-NEXT: lui a1, 4112
1624 ; RV32-NEXT: addi a1, a1, 257
1625 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1626 ; RV32-NEXT: vadd.vv v8, v12, v8
1627 ; RV32-NEXT: vsrl.vi v12, v8, 4
1628 ; RV32-NEXT: vadd.vv v8, v8, v12
1629 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1630 ; RV32-NEXT: vmv.v.x v12, a1
1631 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1632 ; RV32-NEXT: vand.vv v8, v8, v16
1633 ; RV32-NEXT: vmul.vv v8, v8, v12
1634 ; RV32-NEXT: li a0, 56
1635 ; RV32-NEXT: vsrl.vx v8, v8, a0
1638 ; RV64-LABEL: vp_ctpop_nxv4i64_unmasked:
1640 ; RV64-NEXT: lui a1, 349525
1641 ; RV64-NEXT: lui a2, 209715
1642 ; RV64-NEXT: lui a3, 61681
1643 ; RV64-NEXT: lui a4, 4112
1644 ; RV64-NEXT: addiw a1, a1, 1365
1645 ; RV64-NEXT: addiw a2, a2, 819
1646 ; RV64-NEXT: addiw a3, a3, -241
1647 ; RV64-NEXT: addiw a4, a4, 257
1648 ; RV64-NEXT: slli a5, a1, 32
1649 ; RV64-NEXT: add a1, a1, a5
1650 ; RV64-NEXT: slli a5, a2, 32
1651 ; RV64-NEXT: add a2, a2, a5
1652 ; RV64-NEXT: slli a5, a3, 32
1653 ; RV64-NEXT: add a3, a3, a5
1654 ; RV64-NEXT: slli a5, a4, 32
1655 ; RV64-NEXT: add a4, a4, a5
1656 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1657 ; RV64-NEXT: vsrl.vi v12, v8, 1
1658 ; RV64-NEXT: vand.vx v12, v12, a1
1659 ; RV64-NEXT: vsub.vv v8, v8, v12
1660 ; RV64-NEXT: vand.vx v12, v8, a2
1661 ; RV64-NEXT: vsrl.vi v8, v8, 2
1662 ; RV64-NEXT: vand.vx v8, v8, a2
1663 ; RV64-NEXT: vadd.vv v8, v12, v8
1664 ; RV64-NEXT: vsrl.vi v12, v8, 4
1665 ; RV64-NEXT: vadd.vv v8, v8, v12
1666 ; RV64-NEXT: vand.vx v8, v8, a3
1667 ; RV64-NEXT: vmul.vx v8, v8, a4
1668 ; RV64-NEXT: li a0, 56
1669 ; RV64-NEXT: vsrl.vx v8, v8, a0
1672 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64_unmasked:
1673 ; CHECK-ZVBB: # %bb.0:
1674 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1675 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1676 ; CHECK-ZVBB-NEXT: ret
1677 %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1678 ret <vscale x 4 x i64> %v
1681 declare <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32)
1683 define <vscale x 7 x i64> @vp_ctpop_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1684 ; RV32-LABEL: vp_ctpop_nxv7i64:
1686 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1687 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1688 ; RV32-NEXT: lui a1, 349525
1689 ; RV32-NEXT: addi a1, a1, 1365
1690 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1691 ; RV32-NEXT: vmv.v.x v24, a1
1692 ; RV32-NEXT: lui a1, 209715
1693 ; RV32-NEXT: addi a1, a1, 819
1694 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1695 ; RV32-NEXT: vand.vv v24, v16, v24, v0.t
1696 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1697 ; RV32-NEXT: vmv.v.x v16, a1
1698 ; RV32-NEXT: lui a1, 61681
1699 ; RV32-NEXT: addi a1, a1, -241
1700 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1701 ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
1702 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1703 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1704 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1705 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1706 ; RV32-NEXT: vmv.v.x v8, a1
1707 ; RV32-NEXT: lui a1, 4112
1708 ; RV32-NEXT: addi a1, a1, 257
1709 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1710 ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t
1711 ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
1712 ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1713 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1714 ; RV32-NEXT: vmv.v.x v24, a1
1715 ; RV32-NEXT: li a1, 56
1716 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1717 ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
1718 ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t
1719 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1722 ; RV64-LABEL: vp_ctpop_nxv7i64:
1724 ; RV64-NEXT: lui a1, 349525
1725 ; RV64-NEXT: lui a2, 209715
1726 ; RV64-NEXT: lui a3, 61681
1727 ; RV64-NEXT: lui a4, 4112
1728 ; RV64-NEXT: addiw a1, a1, 1365
1729 ; RV64-NEXT: addiw a2, a2, 819
1730 ; RV64-NEXT: addiw a3, a3, -241
1731 ; RV64-NEXT: addiw a4, a4, 257
1732 ; RV64-NEXT: slli a5, a1, 32
1733 ; RV64-NEXT: add a1, a1, a5
1734 ; RV64-NEXT: slli a5, a2, 32
1735 ; RV64-NEXT: add a2, a2, a5
1736 ; RV64-NEXT: slli a5, a3, 32
1737 ; RV64-NEXT: add a3, a3, a5
1738 ; RV64-NEXT: slli a5, a4, 32
1739 ; RV64-NEXT: add a4, a4, a5
1740 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1741 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1742 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1743 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1744 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
1745 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1746 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1747 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1748 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1749 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1750 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1751 ; RV64-NEXT: li a0, 56
1752 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1753 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1756 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64:
1757 ; CHECK-ZVBB: # %bb.0:
1758 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1759 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1760 ; CHECK-ZVBB-NEXT: ret
1761 %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
1762 ret <vscale x 7 x i64> %v
1765 define <vscale x 7 x i64> @vp_ctpop_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
1766 ; RV32-LABEL: vp_ctpop_nxv7i64_unmasked:
1768 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1769 ; RV32-NEXT: vsrl.vi v16, v8, 1
1770 ; RV32-NEXT: lui a1, 349525
1771 ; RV32-NEXT: addi a1, a1, 1365
1772 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1773 ; RV32-NEXT: vmv.v.x v24, a1
1774 ; RV32-NEXT: lui a1, 209715
1775 ; RV32-NEXT: addi a1, a1, 819
1776 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1777 ; RV32-NEXT: vand.vv v24, v16, v24
1778 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1779 ; RV32-NEXT: vmv.v.x v16, a1
1780 ; RV32-NEXT: lui a1, 61681
1781 ; RV32-NEXT: addi a1, a1, -241
1782 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1783 ; RV32-NEXT: vsub.vv v8, v8, v24
1784 ; RV32-NEXT: vand.vv v24, v8, v16
1785 ; RV32-NEXT: vsrl.vi v8, v8, 2
1786 ; RV32-NEXT: vand.vv v8, v8, v16
1787 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1788 ; RV32-NEXT: vmv.v.x v16, a1
1789 ; RV32-NEXT: lui a1, 4112
1790 ; RV32-NEXT: addi a1, a1, 257
1791 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1792 ; RV32-NEXT: vadd.vv v8, v24, v8
1793 ; RV32-NEXT: vsrl.vi v24, v8, 4
1794 ; RV32-NEXT: vadd.vv v8, v8, v24
1795 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1796 ; RV32-NEXT: vmv.v.x v24, a1
1797 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1798 ; RV32-NEXT: vand.vv v8, v8, v16
1799 ; RV32-NEXT: vmul.vv v8, v8, v24
1800 ; RV32-NEXT: li a0, 56
1801 ; RV32-NEXT: vsrl.vx v8, v8, a0
1804 ; RV64-LABEL: vp_ctpop_nxv7i64_unmasked:
1806 ; RV64-NEXT: lui a1, 349525
1807 ; RV64-NEXT: lui a2, 209715
1808 ; RV64-NEXT: lui a3, 61681
1809 ; RV64-NEXT: lui a4, 4112
1810 ; RV64-NEXT: addiw a1, a1, 1365
1811 ; RV64-NEXT: addiw a2, a2, 819
1812 ; RV64-NEXT: addiw a3, a3, -241
1813 ; RV64-NEXT: addiw a4, a4, 257
1814 ; RV64-NEXT: slli a5, a1, 32
1815 ; RV64-NEXT: add a1, a1, a5
1816 ; RV64-NEXT: slli a5, a2, 32
1817 ; RV64-NEXT: add a2, a2, a5
1818 ; RV64-NEXT: slli a5, a3, 32
1819 ; RV64-NEXT: add a3, a3, a5
1820 ; RV64-NEXT: slli a5, a4, 32
1821 ; RV64-NEXT: add a4, a4, a5
1822 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1823 ; RV64-NEXT: vsrl.vi v16, v8, 1
1824 ; RV64-NEXT: vand.vx v16, v16, a1
1825 ; RV64-NEXT: vsub.vv v8, v8, v16
1826 ; RV64-NEXT: vand.vx v16, v8, a2
1827 ; RV64-NEXT: vsrl.vi v8, v8, 2
1828 ; RV64-NEXT: vand.vx v8, v8, a2
1829 ; RV64-NEXT: vadd.vv v8, v16, v8
1830 ; RV64-NEXT: vsrl.vi v16, v8, 4
1831 ; RV64-NEXT: vadd.vv v8, v8, v16
1832 ; RV64-NEXT: vand.vx v8, v8, a3
1833 ; RV64-NEXT: vmul.vx v8, v8, a4
1834 ; RV64-NEXT: li a0, 56
1835 ; RV64-NEXT: vsrl.vx v8, v8, a0
1838 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64_unmasked:
1839 ; CHECK-ZVBB: # %bb.0:
1840 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1841 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
1842 ; CHECK-ZVBB-NEXT: ret
1843 %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
1844 ret <vscale x 7 x i64> %v
1847 declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1849 define <vscale x 8 x i64> @vp_ctpop_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1850 ; RV32-LABEL: vp_ctpop_nxv8i64:
1852 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1853 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1854 ; RV32-NEXT: lui a1, 349525
1855 ; RV32-NEXT: addi a1, a1, 1365
1856 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1857 ; RV32-NEXT: vmv.v.x v24, a1
1858 ; RV32-NEXT: lui a1, 209715
1859 ; RV32-NEXT: addi a1, a1, 819
1860 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1861 ; RV32-NEXT: vand.vv v24, v16, v24, v0.t
1862 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1863 ; RV32-NEXT: vmv.v.x v16, a1
1864 ; RV32-NEXT: lui a1, 61681
1865 ; RV32-NEXT: addi a1, a1, -241
1866 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1867 ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
1868 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1869 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1870 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
1871 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1872 ; RV32-NEXT: vmv.v.x v8, a1
1873 ; RV32-NEXT: lui a1, 4112
1874 ; RV32-NEXT: addi a1, a1, 257
1875 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1876 ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t
1877 ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
1878 ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1879 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1880 ; RV32-NEXT: vmv.v.x v24, a1
1881 ; RV32-NEXT: li a1, 56
1882 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1883 ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
1884 ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t
1885 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
1888 ; RV64-LABEL: vp_ctpop_nxv8i64:
1890 ; RV64-NEXT: lui a1, 349525
1891 ; RV64-NEXT: lui a2, 209715
1892 ; RV64-NEXT: lui a3, 61681
1893 ; RV64-NEXT: lui a4, 4112
1894 ; RV64-NEXT: addiw a1, a1, 1365
1895 ; RV64-NEXT: addiw a2, a2, 819
1896 ; RV64-NEXT: addiw a3, a3, -241
1897 ; RV64-NEXT: addiw a4, a4, 257
1898 ; RV64-NEXT: slli a5, a1, 32
1899 ; RV64-NEXT: add a1, a1, a5
1900 ; RV64-NEXT: slli a5, a2, 32
1901 ; RV64-NEXT: add a2, a2, a5
1902 ; RV64-NEXT: slli a5, a3, 32
1903 ; RV64-NEXT: add a3, a3, a5
1904 ; RV64-NEXT: slli a5, a4, 32
1905 ; RV64-NEXT: add a4, a4, a5
1906 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1907 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
1908 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1909 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
1910 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
1911 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
1912 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1913 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
1914 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
1915 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
1916 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1917 ; RV64-NEXT: li a0, 56
1918 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
1919 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
1922 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64:
1923 ; CHECK-ZVBB: # %bb.0:
1924 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1925 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
1926 ; CHECK-ZVBB-NEXT: ret
1927 %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
1928 ret <vscale x 8 x i64> %v
1931 define <vscale x 8 x i64> @vp_ctpop_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
1932 ; RV32-LABEL: vp_ctpop_nxv8i64_unmasked:
1934 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1935 ; RV32-NEXT: vsrl.vi v16, v8, 1
1936 ; RV32-NEXT: lui a1, 349525
1937 ; RV32-NEXT: addi a1, a1, 1365
1938 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1939 ; RV32-NEXT: vmv.v.x v24, a1
1940 ; RV32-NEXT: lui a1, 209715
1941 ; RV32-NEXT: addi a1, a1, 819
1942 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1943 ; RV32-NEXT: vand.vv v24, v16, v24
1944 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1945 ; RV32-NEXT: vmv.v.x v16, a1
1946 ; RV32-NEXT: lui a1, 61681
1947 ; RV32-NEXT: addi a1, a1, -241
1948 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1949 ; RV32-NEXT: vsub.vv v8, v8, v24
1950 ; RV32-NEXT: vand.vv v24, v8, v16
1951 ; RV32-NEXT: vsrl.vi v8, v8, 2
1952 ; RV32-NEXT: vand.vv v8, v8, v16
1953 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1954 ; RV32-NEXT: vmv.v.x v16, a1
1955 ; RV32-NEXT: lui a1, 4112
1956 ; RV32-NEXT: addi a1, a1, 257
1957 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1958 ; RV32-NEXT: vadd.vv v8, v24, v8
1959 ; RV32-NEXT: vsrl.vi v24, v8, 4
1960 ; RV32-NEXT: vadd.vv v8, v8, v24
1961 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
1962 ; RV32-NEXT: vmv.v.x v24, a1
1963 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1964 ; RV32-NEXT: vand.vv v8, v8, v16
1965 ; RV32-NEXT: vmul.vv v8, v8, v24
1966 ; RV32-NEXT: li a0, 56
1967 ; RV32-NEXT: vsrl.vx v8, v8, a0
1970 ; RV64-LABEL: vp_ctpop_nxv8i64_unmasked:
1972 ; RV64-NEXT: lui a1, 349525
1973 ; RV64-NEXT: lui a2, 209715
1974 ; RV64-NEXT: lui a3, 61681
1975 ; RV64-NEXT: lui a4, 4112
1976 ; RV64-NEXT: addiw a1, a1, 1365
1977 ; RV64-NEXT: addiw a2, a2, 819
1978 ; RV64-NEXT: addiw a3, a3, -241
1979 ; RV64-NEXT: addiw a4, a4, 257
1980 ; RV64-NEXT: slli a5, a1, 32
1981 ; RV64-NEXT: add a1, a1, a5
1982 ; RV64-NEXT: slli a5, a2, 32
1983 ; RV64-NEXT: add a2, a2, a5
1984 ; RV64-NEXT: slli a5, a3, 32
1985 ; RV64-NEXT: add a3, a3, a5
1986 ; RV64-NEXT: slli a5, a4, 32
1987 ; RV64-NEXT: add a4, a4, a5
1988 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1989 ; RV64-NEXT: vsrl.vi v16, v8, 1
1990 ; RV64-NEXT: vand.vx v16, v16, a1
1991 ; RV64-NEXT: vsub.vv v8, v8, v16
1992 ; RV64-NEXT: vand.vx v16, v8, a2
1993 ; RV64-NEXT: vsrl.vi v8, v8, 2
1994 ; RV64-NEXT: vand.vx v8, v8, a2
1995 ; RV64-NEXT: vadd.vv v8, v16, v8
1996 ; RV64-NEXT: vsrl.vi v16, v8, 4
1997 ; RV64-NEXT: vadd.vv v8, v8, v16
1998 ; RV64-NEXT: vand.vx v8, v8, a3
1999 ; RV64-NEXT: vmul.vx v8, v8, a4
2000 ; RV64-NEXT: li a0, 56
2001 ; RV64-NEXT: vsrl.vx v8, v8, a0
2004 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64_unmasked:
2005 ; CHECK-ZVBB: # %bb.0:
2006 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2007 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
2008 ; CHECK-ZVBB-NEXT: ret
2009 %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
2010 ret <vscale x 8 x i64> %v
2013 declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
2015 define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2016 ; RV32-LABEL: vp_ctpop_nxv16i64:
2018 ; RV32-NEXT: addi sp, sp, -16
2019 ; RV32-NEXT: .cfi_def_cfa_offset 16
2020 ; RV32-NEXT: csrr a1, vlenb
2021 ; RV32-NEXT: li a2, 48
2022 ; RV32-NEXT: mul a1, a1, a2
2023 ; RV32-NEXT: sub sp, sp, a1
2024 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
2025 ; RV32-NEXT: vmv1r.v v7, v0
2026 ; RV32-NEXT: csrr a1, vlenb
2027 ; RV32-NEXT: li a2, 24
2028 ; RV32-NEXT: mul a1, a1, a2
2029 ; RV32-NEXT: add a1, sp, a1
2030 ; RV32-NEXT: addi a1, a1, 16
2031 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
2032 ; RV32-NEXT: csrr a1, vlenb
2033 ; RV32-NEXT: lui a2, 349525
2034 ; RV32-NEXT: srli a3, a1, 3
2035 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2036 ; RV32-NEXT: vslidedown.vx v0, v0, a3
2037 ; RV32-NEXT: sub a3, a0, a1
2038 ; RV32-NEXT: addi a2, a2, 1365
2039 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2040 ; RV32-NEXT: vmv.v.x v8, a2
2041 ; RV32-NEXT: csrr a2, vlenb
2042 ; RV32-NEXT: slli a2, a2, 5
2043 ; RV32-NEXT: add a2, sp, a2
2044 ; RV32-NEXT: addi a2, a2, 16
2045 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
2046 ; RV32-NEXT: sltu a2, a0, a3
2047 ; RV32-NEXT: addi a2, a2, -1
2048 ; RV32-NEXT: and a2, a2, a3
2049 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2050 ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
2051 ; RV32-NEXT: csrr a3, vlenb
2052 ; RV32-NEXT: li a4, 40
2053 ; RV32-NEXT: mul a3, a3, a4
2054 ; RV32-NEXT: add a3, sp, a3
2055 ; RV32-NEXT: addi a3, a3, 16
2056 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
2057 ; RV32-NEXT: csrr a3, vlenb
2058 ; RV32-NEXT: slli a3, a3, 5
2059 ; RV32-NEXT: add a3, sp, a3
2060 ; RV32-NEXT: addi a3, a3, 16
2061 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2062 ; RV32-NEXT: csrr a3, vlenb
2063 ; RV32-NEXT: li a4, 40
2064 ; RV32-NEXT: mul a3, a3, a4
2065 ; RV32-NEXT: add a3, sp, a3
2066 ; RV32-NEXT: addi a3, a3, 16
2067 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
2068 ; RV32-NEXT: vand.vv v8, v24, v8, v0.t
2069 ; RV32-NEXT: vsub.vv v16, v16, v8, v0.t
2070 ; RV32-NEXT: lui a3, 209715
2071 ; RV32-NEXT: addi a3, a3, 819
2072 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2073 ; RV32-NEXT: vmv.v.x v8, a3
2074 ; RV32-NEXT: csrr a3, vlenb
2075 ; RV32-NEXT: li a4, 40
2076 ; RV32-NEXT: mul a3, a3, a4
2077 ; RV32-NEXT: add a3, sp, a3
2078 ; RV32-NEXT: addi a3, a3, 16
2079 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
2080 ; RV32-NEXT: csrr a3, vlenb
2081 ; RV32-NEXT: li a4, 40
2082 ; RV32-NEXT: mul a3, a3, a4
2083 ; RV32-NEXT: add a3, sp, a3
2084 ; RV32-NEXT: addi a3, a3, 16
2085 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2086 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2087 ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
2088 ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
2089 ; RV32-NEXT: csrr a3, vlenb
2090 ; RV32-NEXT: li a4, 40
2091 ; RV32-NEXT: mul a3, a3, a4
2092 ; RV32-NEXT: add a3, sp, a3
2093 ; RV32-NEXT: addi a3, a3, 16
2094 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
2095 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2096 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
2097 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2098 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
2099 ; RV32-NEXT: lui a3, 61681
2100 ; RV32-NEXT: addi a3, a3, -241
2101 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2102 ; RV32-NEXT: vmv.v.x v16, a3
2103 ; RV32-NEXT: csrr a3, vlenb
2104 ; RV32-NEXT: slli a3, a3, 4
2105 ; RV32-NEXT: add a3, sp, a3
2106 ; RV32-NEXT: addi a3, a3, 16
2107 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2108 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2109 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2110 ; RV32-NEXT: lui a3, 4112
2111 ; RV32-NEXT: addi a3, a3, 257
2112 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2113 ; RV32-NEXT: vmv.v.x v16, a3
2114 ; RV32-NEXT: csrr a3, vlenb
2115 ; RV32-NEXT: slli a3, a3, 3
2116 ; RV32-NEXT: add a3, sp, a3
2117 ; RV32-NEXT: addi a3, a3, 16
2118 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2119 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2120 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
2121 ; RV32-NEXT: li a2, 56
2122 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t
2123 ; RV32-NEXT: addi a3, sp, 16
2124 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
2125 ; RV32-NEXT: bltu a0, a1, .LBB46_2
2126 ; RV32-NEXT: # %bb.1:
2127 ; RV32-NEXT: mv a0, a1
2128 ; RV32-NEXT: .LBB46_2:
2129 ; RV32-NEXT: vmv1r.v v0, v7
2130 ; RV32-NEXT: li a3, 24
2131 ; RV32-NEXT: mul a1, a1, a3
2132 ; RV32-NEXT: add a1, sp, a1
2133 ; RV32-NEXT: addi a1, a1, 16
2134 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2135 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2136 ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
2137 ; RV32-NEXT: csrr a0, vlenb
2138 ; RV32-NEXT: slli a0, a0, 5
2139 ; RV32-NEXT: add a0, sp, a0
2140 ; RV32-NEXT: addi a0, a0, 16
2141 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2142 ; RV32-NEXT: vand.vv v8, v24, v8, v0.t
2143 ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
2144 ; RV32-NEXT: csrr a0, vlenb
2145 ; RV32-NEXT: slli a0, a0, 5
2146 ; RV32-NEXT: add a0, sp, a0
2147 ; RV32-NEXT: addi a0, a0, 16
2148 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
2149 ; RV32-NEXT: csrr a0, vlenb
2150 ; RV32-NEXT: li a1, 40
2151 ; RV32-NEXT: mul a0, a0, a1
2152 ; RV32-NEXT: add a0, sp, a0
2153 ; RV32-NEXT: addi a0, a0, 16
2154 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2155 ; RV32-NEXT: csrr a0, vlenb
2156 ; RV32-NEXT: slli a0, a0, 5
2157 ; RV32-NEXT: add a0, sp, a0
2158 ; RV32-NEXT: addi a0, a0, 16
2159 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2160 ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2161 ; RV32-NEXT: csrr a0, vlenb
2162 ; RV32-NEXT: li a1, 24
2163 ; RV32-NEXT: mul a0, a0, a1
2164 ; RV32-NEXT: add a0, sp, a0
2165 ; RV32-NEXT: addi a0, a0, 16
2166 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
2167 ; RV32-NEXT: vmv8r.v v16, v8
2168 ; RV32-NEXT: csrr a0, vlenb
2169 ; RV32-NEXT: slli a0, a0, 5
2170 ; RV32-NEXT: add a0, sp, a0
2171 ; RV32-NEXT: addi a0, a0, 16
2172 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2173 ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
2174 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2175 ; RV32-NEXT: csrr a0, vlenb
2176 ; RV32-NEXT: li a1, 24
2177 ; RV32-NEXT: mul a0, a0, a1
2178 ; RV32-NEXT: add a0, sp, a0
2179 ; RV32-NEXT: addi a0, a0, 16
2180 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2181 ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
2182 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2183 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
2184 ; RV32-NEXT: csrr a0, vlenb
2185 ; RV32-NEXT: slli a0, a0, 4
2186 ; RV32-NEXT: add a0, sp, a0
2187 ; RV32-NEXT: addi a0, a0, 16
2188 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2189 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2190 ; RV32-NEXT: csrr a0, vlenb
2191 ; RV32-NEXT: slli a0, a0, 3
2192 ; RV32-NEXT: add a0, sp, a0
2193 ; RV32-NEXT: addi a0, a0, 16
2194 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2195 ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
2196 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t
2197 ; RV32-NEXT: addi a0, sp, 16
2198 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2199 ; RV32-NEXT: csrr a0, vlenb
2200 ; RV32-NEXT: li a1, 48
2201 ; RV32-NEXT: mul a0, a0, a1
2202 ; RV32-NEXT: add sp, sp, a0
2203 ; RV32-NEXT: .cfi_def_cfa sp, 16
2204 ; RV32-NEXT: addi sp, sp, 16
2205 ; RV32-NEXT: .cfi_def_cfa_offset 0
2208 ; RV64-LABEL: vp_ctpop_nxv16i64:
2210 ; RV64-NEXT: addi sp, sp, -16
2211 ; RV64-NEXT: .cfi_def_cfa_offset 16
2212 ; RV64-NEXT: csrr a1, vlenb
2213 ; RV64-NEXT: slli a1, a1, 4
2214 ; RV64-NEXT: sub sp, sp, a1
2215 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2216 ; RV64-NEXT: csrr a1, vlenb
2217 ; RV64-NEXT: slli a1, a1, 3
2218 ; RV64-NEXT: add a1, sp, a1
2219 ; RV64-NEXT: addi a1, a1, 16
2220 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2221 ; RV64-NEXT: csrr a1, vlenb
2222 ; RV64-NEXT: srli a2, a1, 3
2223 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
2224 ; RV64-NEXT: vslidedown.vx v24, v0, a2
2225 ; RV64-NEXT: mv a2, a0
2226 ; RV64-NEXT: bltu a0, a1, .LBB46_2
2227 ; RV64-NEXT: # %bb.1:
2228 ; RV64-NEXT: mv a2, a1
2229 ; RV64-NEXT: .LBB46_2:
2230 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2231 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
2232 ; RV64-NEXT: lui a2, 349525
2233 ; RV64-NEXT: lui a3, 209715
2234 ; RV64-NEXT: lui a4, 61681
2235 ; RV64-NEXT: lui a5, 4112
2236 ; RV64-NEXT: addiw a2, a2, 1365
2237 ; RV64-NEXT: addiw a3, a3, 819
2238 ; RV64-NEXT: addiw a4, a4, -241
2239 ; RV64-NEXT: addiw a5, a5, 257
2240 ; RV64-NEXT: slli a6, a2, 32
2241 ; RV64-NEXT: add a6, a2, a6
2242 ; RV64-NEXT: slli a2, a3, 32
2243 ; RV64-NEXT: add a7, a3, a2
2244 ; RV64-NEXT: slli a2, a4, 32
2245 ; RV64-NEXT: add a2, a4, a2
2246 ; RV64-NEXT: slli a3, a5, 32
2247 ; RV64-NEXT: add a3, a5, a3
2248 ; RV64-NEXT: li a4, 56
2249 ; RV64-NEXT: sub a1, a0, a1
2250 ; RV64-NEXT: sltu a0, a0, a1
2251 ; RV64-NEXT: addi a0, a0, -1
2252 ; RV64-NEXT: and a0, a0, a1
2253 ; RV64-NEXT: vand.vx v16, v16, a6, v0.t
2254 ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
2255 ; RV64-NEXT: vand.vx v16, v8, a7, v0.t
2256 ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
2257 ; RV64-NEXT: vand.vx v8, v8, a7, v0.t
2258 ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
2259 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
2260 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
2261 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
2262 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t
2263 ; RV64-NEXT: vsrl.vx v8, v8, a4, v0.t
2264 ; RV64-NEXT: addi a1, sp, 16
2265 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
2266 ; RV64-NEXT: vmv1r.v v0, v24
2267 ; RV64-NEXT: csrr a1, vlenb
2268 ; RV64-NEXT: slli a1, a1, 3
2269 ; RV64-NEXT: add a1, sp, a1
2270 ; RV64-NEXT: addi a1, a1, 16
2271 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2272 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2273 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
2274 ; RV64-NEXT: vand.vx v16, v16, a6, v0.t
2275 ; RV64-NEXT: vsub.vv v16, v8, v16, v0.t
2276 ; RV64-NEXT: vand.vx v8, v16, a7, v0.t
2277 ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
2278 ; RV64-NEXT: vand.vx v16, v16, a7, v0.t
2279 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
2280 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
2281 ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
2282 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
2283 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t
2284 ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
2285 ; RV64-NEXT: addi a0, sp, 16
2286 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2287 ; RV64-NEXT: csrr a0, vlenb
2288 ; RV64-NEXT: slli a0, a0, 4
2289 ; RV64-NEXT: add sp, sp, a0
2290 ; RV64-NEXT: .cfi_def_cfa sp, 16
2291 ; RV64-NEXT: addi sp, sp, 16
2292 ; RV64-NEXT: .cfi_def_cfa_offset 0
2295 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64:
2296 ; CHECK-ZVBB: # %bb.0:
2297 ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0
2298 ; CHECK-ZVBB-NEXT: csrr a1, vlenb
2299 ; CHECK-ZVBB-NEXT: srli a2, a1, 3
2300 ; CHECK-ZVBB-NEXT: sub a3, a0, a1
2301 ; CHECK-ZVBB-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2302 ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2
2303 ; CHECK-ZVBB-NEXT: sltu a2, a0, a3
2304 ; CHECK-ZVBB-NEXT: addi a2, a2, -1
2305 ; CHECK-ZVBB-NEXT: and a2, a2, a3
2306 ; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2307 ; CHECK-ZVBB-NEXT: vcpop.v v16, v16, v0.t
2308 ; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2
2309 ; CHECK-ZVBB-NEXT: # %bb.1:
2310 ; CHECK-ZVBB-NEXT: mv a0, a1
2311 ; CHECK-ZVBB-NEXT: .LBB46_2:
2312 ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
2313 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2314 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
2315 ; CHECK-ZVBB-NEXT: ret
2316 %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl)
2317 ret <vscale x 16 x i64> %v
2320 define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
2321 ; RV32-LABEL: vp_ctpop_nxv16i64_unmasked:
2323 ; RV32-NEXT: addi sp, sp, -16
2324 ; RV32-NEXT: .cfi_def_cfa_offset 16
2325 ; RV32-NEXT: csrr a1, vlenb
2326 ; RV32-NEXT: slli a1, a1, 5
2327 ; RV32-NEXT: sub sp, sp, a1
2328 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
2329 ; RV32-NEXT: csrr a1, vlenb
2330 ; RV32-NEXT: lui a2, 349525
2331 ; RV32-NEXT: lui a3, 209715
2332 ; RV32-NEXT: sub a4, a0, a1
2333 ; RV32-NEXT: addi a2, a2, 1365
2334 ; RV32-NEXT: addi a3, a3, 819
2335 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
2336 ; RV32-NEXT: vmv.v.x v0, a2
2337 ; RV32-NEXT: sltu a2, a0, a4
2338 ; RV32-NEXT: addi a2, a2, -1
2339 ; RV32-NEXT: and a2, a2, a4
2340 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2341 ; RV32-NEXT: vsrl.vi v24, v16, 1
2342 ; RV32-NEXT: csrr a4, vlenb
2343 ; RV32-NEXT: li a5, 24
2344 ; RV32-NEXT: mul a4, a4, a5
2345 ; RV32-NEXT: add a4, sp, a4
2346 ; RV32-NEXT: addi a4, a4, 16
2347 ; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill
2348 ; RV32-NEXT: vand.vv v24, v24, v0
2349 ; RV32-NEXT: vsub.vv v16, v16, v24
2350 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2351 ; RV32-NEXT: vmv.v.x v0, a3
2352 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2353 ; RV32-NEXT: vand.vv v24, v16, v0
2354 ; RV32-NEXT: vsrl.vi v16, v16, 2
2355 ; RV32-NEXT: csrr a3, vlenb
2356 ; RV32-NEXT: slli a3, a3, 4
2357 ; RV32-NEXT: add a3, sp, a3
2358 ; RV32-NEXT: addi a3, a3, 16
2359 ; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
2360 ; RV32-NEXT: vand.vv v16, v16, v0
2361 ; RV32-NEXT: vadd.vv v16, v24, v16
2362 ; RV32-NEXT: vsrl.vi v24, v16, 4
2363 ; RV32-NEXT: vadd.vv v16, v16, v24
2364 ; RV32-NEXT: lui a3, 61681
2365 ; RV32-NEXT: lui a4, 4112
2366 ; RV32-NEXT: addi a3, a3, -241
2367 ; RV32-NEXT: addi a4, a4, 257
2368 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
2369 ; RV32-NEXT: vmv.v.x v24, a3
2370 ; RV32-NEXT: csrr a3, vlenb
2371 ; RV32-NEXT: slli a3, a3, 3
2372 ; RV32-NEXT: add a3, sp, a3
2373 ; RV32-NEXT: addi a3, a3, 16
2374 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
2375 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2376 ; RV32-NEXT: vand.vv v16, v16, v24
2377 ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma
2378 ; RV32-NEXT: vmv.v.x v24, a4
2379 ; RV32-NEXT: addi a3, sp, 16
2380 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
2381 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2382 ; RV32-NEXT: vmul.vv v16, v16, v24
2383 ; RV32-NEXT: li a2, 56
2384 ; RV32-NEXT: vsrl.vx v16, v16, a2
2385 ; RV32-NEXT: bltu a0, a1, .LBB47_2
2386 ; RV32-NEXT: # %bb.1:
2387 ; RV32-NEXT: mv a0, a1
2388 ; RV32-NEXT: .LBB47_2:
2389 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2390 ; RV32-NEXT: vsrl.vi v24, v8, 1
2391 ; RV32-NEXT: csrr a0, vlenb
2392 ; RV32-NEXT: li a1, 24
2393 ; RV32-NEXT: mul a0, a0, a1
2394 ; RV32-NEXT: add a0, sp, a0
2395 ; RV32-NEXT: addi a0, a0, 16
2396 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
2397 ; RV32-NEXT: vand.vv v24, v24, v0
2398 ; RV32-NEXT: vsub.vv v8, v8, v24
2399 ; RV32-NEXT: csrr a0, vlenb
2400 ; RV32-NEXT: slli a0, a0, 4
2401 ; RV32-NEXT: add a0, sp, a0
2402 ; RV32-NEXT: addi a0, a0, 16
2403 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
2404 ; RV32-NEXT: vand.vv v24, v8, v0
2405 ; RV32-NEXT: vsrl.vi v8, v8, 2
2406 ; RV32-NEXT: vand.vv v8, v8, v0
2407 ; RV32-NEXT: vadd.vv v8, v24, v8
2408 ; RV32-NEXT: vsrl.vi v24, v8, 4
2409 ; RV32-NEXT: vadd.vv v8, v8, v24
2410 ; RV32-NEXT: csrr a0, vlenb
2411 ; RV32-NEXT: slli a0, a0, 3
2412 ; RV32-NEXT: add a0, sp, a0
2413 ; RV32-NEXT: addi a0, a0, 16
2414 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
2415 ; RV32-NEXT: vand.vv v8, v8, v24
2416 ; RV32-NEXT: addi a0, sp, 16
2417 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
2418 ; RV32-NEXT: vmul.vv v8, v8, v24
2419 ; RV32-NEXT: vsrl.vx v8, v8, a2
2420 ; RV32-NEXT: csrr a0, vlenb
2421 ; RV32-NEXT: slli a0, a0, 5
2422 ; RV32-NEXT: add sp, sp, a0
2423 ; RV32-NEXT: .cfi_def_cfa sp, 16
2424 ; RV32-NEXT: addi sp, sp, 16
2425 ; RV32-NEXT: .cfi_def_cfa_offset 0
2428 ; RV64-LABEL: vp_ctpop_nxv16i64_unmasked:
2430 ; RV64-NEXT: csrr a2, vlenb
2431 ; RV64-NEXT: mv a1, a0
2432 ; RV64-NEXT: bltu a0, a2, .LBB47_2
2433 ; RV64-NEXT: # %bb.1:
2434 ; RV64-NEXT: mv a1, a2
2435 ; RV64-NEXT: .LBB47_2:
2436 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2437 ; RV64-NEXT: vsrl.vi v24, v8, 1
2438 ; RV64-NEXT: lui a3, 349525
2439 ; RV64-NEXT: lui a4, 209715
2440 ; RV64-NEXT: lui a5, 61681
2441 ; RV64-NEXT: lui a6, 4112
2442 ; RV64-NEXT: addiw a3, a3, 1365
2443 ; RV64-NEXT: addiw a4, a4, 819
2444 ; RV64-NEXT: addiw a5, a5, -241
2445 ; RV64-NEXT: addiw a6, a6, 257
2446 ; RV64-NEXT: slli a7, a3, 32
2447 ; RV64-NEXT: add a3, a3, a7
2448 ; RV64-NEXT: slli a7, a4, 32
2449 ; RV64-NEXT: add a4, a4, a7
2450 ; RV64-NEXT: slli a7, a5, 32
2451 ; RV64-NEXT: add a5, a5, a7
2452 ; RV64-NEXT: slli a7, a6, 32
2453 ; RV64-NEXT: add a6, a6, a7
2454 ; RV64-NEXT: li a7, 56
2455 ; RV64-NEXT: sub a2, a0, a2
2456 ; RV64-NEXT: sltu a0, a0, a2
2457 ; RV64-NEXT: addi a0, a0, -1
2458 ; RV64-NEXT: and a0, a0, a2
2459 ; RV64-NEXT: vand.vx v24, v24, a3
2460 ; RV64-NEXT: vsub.vv v8, v8, v24
2461 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2462 ; RV64-NEXT: vsrl.vi v24, v16, 1
2463 ; RV64-NEXT: vand.vx v24, v24, a3
2464 ; RV64-NEXT: vsub.vv v16, v16, v24
2465 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2466 ; RV64-NEXT: vand.vx v24, v8, a4
2467 ; RV64-NEXT: vsrl.vi v8, v8, 2
2468 ; RV64-NEXT: vand.vx v8, v8, a4
2469 ; RV64-NEXT: vadd.vv v8, v24, v8
2470 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2471 ; RV64-NEXT: vand.vx v24, v16, a4
2472 ; RV64-NEXT: vsrl.vi v16, v16, 2
2473 ; RV64-NEXT: vand.vx v16, v16, a4
2474 ; RV64-NEXT: vadd.vv v16, v24, v16
2475 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2476 ; RV64-NEXT: vsrl.vi v24, v8, 4
2477 ; RV64-NEXT: vadd.vv v8, v8, v24
2478 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2479 ; RV64-NEXT: vsrl.vi v24, v16, 4
2480 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2481 ; RV64-NEXT: vand.vx v8, v8, a5
2482 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2483 ; RV64-NEXT: vadd.vv v16, v16, v24
2484 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2485 ; RV64-NEXT: vmul.vx v8, v8, a6
2486 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2487 ; RV64-NEXT: vand.vx v16, v16, a5
2488 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2489 ; RV64-NEXT: vsrl.vx v8, v8, a7
2490 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2491 ; RV64-NEXT: vmul.vx v16, v16, a6
2492 ; RV64-NEXT: vsrl.vx v16, v16, a7
2495 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked:
2496 ; CHECK-ZVBB: # %bb.0:
2497 ; CHECK-ZVBB-NEXT: csrr a1, vlenb
2498 ; CHECK-ZVBB-NEXT: sub a2, a0, a1
2499 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2
2500 ; CHECK-ZVBB-NEXT: addi a3, a3, -1
2501 ; CHECK-ZVBB-NEXT: and a2, a3, a2
2502 ; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2503 ; CHECK-ZVBB-NEXT: vcpop.v v16, v16
2504 ; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2
2505 ; CHECK-ZVBB-NEXT: # %bb.1:
2506 ; CHECK-ZVBB-NEXT: mv a0, a1
2507 ; CHECK-ZVBB-NEXT: .LBB47_2:
2508 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2509 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8
2510 ; CHECK-ZVBB-NEXT: ret
2511 %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
2512 ret <vscale x 16 x i64> %v
2516 declare <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
2518 define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
2519 ; CHECK-LABEL: vp_ctpop_nxv1i9:
2521 ; CHECK-NEXT: li a1, 511
2522 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2523 ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
2524 ; CHECK-NEXT: lui a0, 5
2525 ; CHECK-NEXT: addi a0, a0, 1365
2526 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
2527 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
2528 ; CHECK-NEXT: lui a0, 3
2529 ; CHECK-NEXT: addi a0, a0, 819
2530 ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
2531 ; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
2532 ; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
2533 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
2534 ; CHECK-NEXT: lui a0, 1
2535 ; CHECK-NEXT: addi a0, a0, -241
2536 ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
2537 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
2538 ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
2539 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
2540 ; CHECK-NEXT: li a0, 257
2541 ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
2542 ; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
2545 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9:
2546 ; CHECK-ZVBB: # %bb.0:
2547 ; CHECK-ZVBB-NEXT: li a1, 511
2548 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2549 ; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1, v0.t
2550 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
2551 ; CHECK-ZVBB-NEXT: ret
2552 %v = call <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
2553 ret <vscale x 1 x i9> %v