1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
7 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
9 define void @ctpop_v16i8(ptr %x, ptr %y) {
10 ; CHECK-LABEL: ctpop_v16i8:
12 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
13 ; CHECK-NEXT: vle8.v v8, (a0)
14 ; CHECK-NEXT: vsrl.vi v9, v8, 1
15 ; CHECK-NEXT: li a1, 85
16 ; CHECK-NEXT: vand.vx v9, v9, a1
17 ; CHECK-NEXT: vsub.vv v8, v8, v9
18 ; CHECK-NEXT: li a1, 51
19 ; CHECK-NEXT: vand.vx v9, v8, a1
20 ; CHECK-NEXT: vsrl.vi v8, v8, 2
21 ; CHECK-NEXT: vand.vx v8, v8, a1
22 ; CHECK-NEXT: vadd.vv v8, v9, v8
23 ; CHECK-NEXT: vsrl.vi v9, v8, 4
24 ; CHECK-NEXT: vadd.vv v8, v8, v9
25 ; CHECK-NEXT: vand.vi v8, v8, 15
26 ; CHECK-NEXT: vse8.v v8, (a0)
29 ; ZVBB-LABEL: ctpop_v16i8:
31 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
32 ; ZVBB-NEXT: vle8.v v8, (a0)
33 ; ZVBB-NEXT: vcpop.v v8, v8
34 ; ZVBB-NEXT: vse8.v v8, (a0)
36 %a = load <16 x i8>, ptr %x
37 %b = load <16 x i8>, ptr %y
38 %c = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
39 store <16 x i8> %c, ptr %x
42 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
44 define void @ctpop_v8i16(ptr %x, ptr %y) {
45 ; CHECK-LABEL: ctpop_v8i16:
47 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
48 ; CHECK-NEXT: vle16.v v8, (a0)
49 ; CHECK-NEXT: vsrl.vi v9, v8, 1
50 ; CHECK-NEXT: lui a1, 5
51 ; CHECK-NEXT: addi a1, a1, 1365
52 ; CHECK-NEXT: vand.vx v9, v9, a1
53 ; CHECK-NEXT: vsub.vv v8, v8, v9
54 ; CHECK-NEXT: lui a1, 3
55 ; CHECK-NEXT: addi a1, a1, 819
56 ; CHECK-NEXT: vand.vx v9, v8, a1
57 ; CHECK-NEXT: vsrl.vi v8, v8, 2
58 ; CHECK-NEXT: vand.vx v8, v8, a1
59 ; CHECK-NEXT: vadd.vv v8, v9, v8
60 ; CHECK-NEXT: vsrl.vi v9, v8, 4
61 ; CHECK-NEXT: vadd.vv v8, v8, v9
62 ; CHECK-NEXT: lui a1, 1
63 ; CHECK-NEXT: addi a1, a1, -241
64 ; CHECK-NEXT: vand.vx v8, v8, a1
65 ; CHECK-NEXT: li a1, 257
66 ; CHECK-NEXT: vmul.vx v8, v8, a1
67 ; CHECK-NEXT: vsrl.vi v8, v8, 8
68 ; CHECK-NEXT: vse16.v v8, (a0)
71 ; ZVBB-LABEL: ctpop_v8i16:
73 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
74 ; ZVBB-NEXT: vle16.v v8, (a0)
75 ; ZVBB-NEXT: vcpop.v v8, v8
76 ; ZVBB-NEXT: vse16.v v8, (a0)
78 %a = load <8 x i16>, ptr %x
79 %b = load <8 x i16>, ptr %y
80 %c = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
81 store <8 x i16> %c, ptr %x
84 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
86 define void @ctpop_v4i32(ptr %x, ptr %y) {
87 ; CHECK-LABEL: ctpop_v4i32:
89 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
90 ; CHECK-NEXT: vle32.v v8, (a0)
91 ; CHECK-NEXT: vsrl.vi v9, v8, 1
92 ; CHECK-NEXT: lui a1, 349525
93 ; CHECK-NEXT: addi a1, a1, 1365
94 ; CHECK-NEXT: vand.vx v9, v9, a1
95 ; CHECK-NEXT: vsub.vv v8, v8, v9
96 ; CHECK-NEXT: lui a1, 209715
97 ; CHECK-NEXT: addi a1, a1, 819
98 ; CHECK-NEXT: vand.vx v9, v8, a1
99 ; CHECK-NEXT: vsrl.vi v8, v8, 2
100 ; CHECK-NEXT: vand.vx v8, v8, a1
101 ; CHECK-NEXT: vadd.vv v8, v9, v8
102 ; CHECK-NEXT: vsrl.vi v9, v8, 4
103 ; CHECK-NEXT: vadd.vv v8, v8, v9
104 ; CHECK-NEXT: lui a1, 61681
105 ; CHECK-NEXT: addi a1, a1, -241
106 ; CHECK-NEXT: vand.vx v8, v8, a1
107 ; CHECK-NEXT: lui a1, 4112
108 ; CHECK-NEXT: addi a1, a1, 257
109 ; CHECK-NEXT: vmul.vx v8, v8, a1
110 ; CHECK-NEXT: vsrl.vi v8, v8, 24
111 ; CHECK-NEXT: vse32.v v8, (a0)
114 ; ZVBB-LABEL: ctpop_v4i32:
116 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
117 ; ZVBB-NEXT: vle32.v v8, (a0)
118 ; ZVBB-NEXT: vcpop.v v8, v8
119 ; ZVBB-NEXT: vse32.v v8, (a0)
121 %a = load <4 x i32>, ptr %x
122 %b = load <4 x i32>, ptr %y
123 %c = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
124 store <4 x i32> %c, ptr %x
127 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
129 define void @ctpop_v2i64(ptr %x, ptr %y) {
130 ; LMULMAX2-RV32-LABEL: ctpop_v2i64:
131 ; LMULMAX2-RV32: # %bb.0:
132 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
133 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
134 ; LMULMAX2-RV32-NEXT: lui a1, 349525
135 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
136 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
137 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
138 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
139 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
140 ; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9
141 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9
142 ; LMULMAX2-RV32-NEXT: lui a1, 209715
143 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819
144 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
145 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
146 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
147 ; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9
148 ; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
149 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
150 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
151 ; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4
152 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9
153 ; LMULMAX2-RV32-NEXT: lui a1, 61681
154 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241
155 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
156 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
157 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
158 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
159 ; LMULMAX2-RV32-NEXT: lui a1, 4112
160 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257
161 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
162 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
163 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
164 ; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9
165 ; LMULMAX2-RV32-NEXT: li a1, 56
166 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1
167 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
168 ; LMULMAX2-RV32-NEXT: ret
170 ; LMULMAX2-RV64-LABEL: ctpop_v2i64:
171 ; LMULMAX2-RV64: # %bb.0:
172 ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
173 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
174 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1
175 ; LMULMAX2-RV64-NEXT: lui a1, 349525
176 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
177 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
178 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
179 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1
180 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9
181 ; LMULMAX2-RV64-NEXT: lui a1, 209715
182 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
183 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
184 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
185 ; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1
186 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
187 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
188 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8
189 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4
190 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9
191 ; LMULMAX2-RV64-NEXT: lui a1, 61681
192 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
193 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
194 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
195 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
196 ; LMULMAX2-RV64-NEXT: lui a1, 4112
197 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 257
198 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
199 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
200 ; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
201 ; LMULMAX2-RV64-NEXT: li a1, 56
202 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1
203 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
204 ; LMULMAX2-RV64-NEXT: ret
206 ; LMULMAX1-RV32-LABEL: ctpop_v2i64:
207 ; LMULMAX1-RV32: # %bb.0:
208 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
209 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
210 ; LMULMAX1-RV32-NEXT: lui a1, 349525
211 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
212 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
213 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
214 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
215 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
216 ; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9
217 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
218 ; LMULMAX1-RV32-NEXT: lui a1, 209715
219 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819
220 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
221 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
222 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
223 ; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9
224 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
225 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
226 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
227 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
228 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
229 ; LMULMAX1-RV32-NEXT: lui a1, 61681
230 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241
231 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
232 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
233 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
234 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
235 ; LMULMAX1-RV32-NEXT: lui a1, 4112
236 ; LMULMAX1-RV32-NEXT: addi a1, a1, 257
237 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
238 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
239 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
240 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9
241 ; LMULMAX1-RV32-NEXT: li a1, 56
242 ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1
243 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
244 ; LMULMAX1-RV32-NEXT: ret
246 ; LMULMAX1-RV64-LABEL: ctpop_v2i64:
247 ; LMULMAX1-RV64: # %bb.0:
248 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
249 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
250 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
251 ; LMULMAX1-RV64-NEXT: lui a1, 349525
252 ; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
253 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
254 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
255 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
256 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
257 ; LMULMAX1-RV64-NEXT: lui a1, 209715
258 ; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
259 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
260 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
261 ; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
262 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
263 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
264 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
265 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
266 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
267 ; LMULMAX1-RV64-NEXT: lui a1, 61681
268 ; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
269 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
270 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
271 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
272 ; LMULMAX1-RV64-NEXT: lui a1, 4112
273 ; LMULMAX1-RV64-NEXT: addiw a1, a1, 257
274 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
275 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
276 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
277 ; LMULMAX1-RV64-NEXT: li a1, 56
278 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1
279 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
280 ; LMULMAX1-RV64-NEXT: ret
282 ; ZVBB-LABEL: ctpop_v2i64:
284 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
285 ; ZVBB-NEXT: vle64.v v8, (a0)
286 ; ZVBB-NEXT: vcpop.v v8, v8
287 ; ZVBB-NEXT: vse64.v v8, (a0)
289 %a = load <2 x i64>, ptr %x
290 %b = load <2 x i64>, ptr %y
291 %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
292 store <2 x i64> %c, ptr %x
295 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
297 define void @ctpop_v32i8(ptr %x, ptr %y) {
298 ; LMULMAX2-LABEL: ctpop_v32i8:
300 ; LMULMAX2-NEXT: li a1, 32
301 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
302 ; LMULMAX2-NEXT: vle8.v v8, (a0)
303 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
304 ; LMULMAX2-NEXT: li a1, 85
305 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
306 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
307 ; LMULMAX2-NEXT: li a1, 51
308 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
309 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
310 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
311 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
312 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
313 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
314 ; LMULMAX2-NEXT: vand.vi v8, v8, 15
315 ; LMULMAX2-NEXT: vse8.v v8, (a0)
318 ; LMULMAX1-LABEL: ctpop_v32i8:
320 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
321 ; LMULMAX1-NEXT: addi a1, a0, 16
322 ; LMULMAX1-NEXT: vle8.v v8, (a1)
323 ; LMULMAX1-NEXT: vle8.v v9, (a0)
324 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
325 ; LMULMAX1-NEXT: li a2, 85
326 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
327 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
328 ; LMULMAX1-NEXT: li a3, 51
329 ; LMULMAX1-NEXT: vand.vx v10, v8, a3
330 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
331 ; LMULMAX1-NEXT: vand.vx v8, v8, a3
332 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
333 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
334 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
335 ; LMULMAX1-NEXT: vand.vi v8, v8, 15
336 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
337 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
338 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
339 ; LMULMAX1-NEXT: vand.vx v10, v9, a3
340 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
341 ; LMULMAX1-NEXT: vand.vx v9, v9, a3
342 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
343 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
344 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
345 ; LMULMAX1-NEXT: vand.vi v9, v9, 15
346 ; LMULMAX1-NEXT: vse8.v v9, (a0)
347 ; LMULMAX1-NEXT: vse8.v v8, (a1)
350 ; ZVBB-LABEL: ctpop_v32i8:
352 ; ZVBB-NEXT: li a1, 32
353 ; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
354 ; ZVBB-NEXT: vle8.v v8, (a0)
355 ; ZVBB-NEXT: vcpop.v v8, v8
356 ; ZVBB-NEXT: vse8.v v8, (a0)
358 %a = load <32 x i8>, ptr %x
359 %b = load <32 x i8>, ptr %y
360 %c = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
361 store <32 x i8> %c, ptr %x
364 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
366 define void @ctpop_v16i16(ptr %x, ptr %y) {
367 ; LMULMAX2-LABEL: ctpop_v16i16:
369 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
370 ; LMULMAX2-NEXT: vle16.v v8, (a0)
371 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
372 ; LMULMAX2-NEXT: lui a1, 5
373 ; LMULMAX2-NEXT: addi a1, a1, 1365
374 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
375 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
376 ; LMULMAX2-NEXT: lui a1, 3
377 ; LMULMAX2-NEXT: addi a1, a1, 819
378 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
379 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
380 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
381 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
382 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
383 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
384 ; LMULMAX2-NEXT: lui a1, 1
385 ; LMULMAX2-NEXT: addi a1, a1, -241
386 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
387 ; LMULMAX2-NEXT: li a1, 257
388 ; LMULMAX2-NEXT: vmul.vx v8, v8, a1
389 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
390 ; LMULMAX2-NEXT: vse16.v v8, (a0)
393 ; LMULMAX1-LABEL: ctpop_v16i16:
395 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
396 ; LMULMAX1-NEXT: addi a1, a0, 16
397 ; LMULMAX1-NEXT: vle16.v v8, (a1)
398 ; LMULMAX1-NEXT: vle16.v v9, (a0)
399 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
400 ; LMULMAX1-NEXT: lui a2, 5
401 ; LMULMAX1-NEXT: addi a2, a2, 1365
402 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
403 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
404 ; LMULMAX1-NEXT: lui a3, 3
405 ; LMULMAX1-NEXT: addi a3, a3, 819
406 ; LMULMAX1-NEXT: vand.vx v10, v8, a3
407 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
408 ; LMULMAX1-NEXT: vand.vx v8, v8, a3
409 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
410 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
411 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
412 ; LMULMAX1-NEXT: lui a4, 1
413 ; LMULMAX1-NEXT: addi a4, a4, -241
414 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
415 ; LMULMAX1-NEXT: li a5, 257
416 ; LMULMAX1-NEXT: vmul.vx v8, v8, a5
417 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
418 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
419 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
420 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
421 ; LMULMAX1-NEXT: vand.vx v10, v9, a3
422 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
423 ; LMULMAX1-NEXT: vand.vx v9, v9, a3
424 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
425 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
426 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
427 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
428 ; LMULMAX1-NEXT: vmul.vx v9, v9, a5
429 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
430 ; LMULMAX1-NEXT: vse16.v v9, (a0)
431 ; LMULMAX1-NEXT: vse16.v v8, (a1)
434 ; ZVBB-LABEL: ctpop_v16i16:
436 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
437 ; ZVBB-NEXT: vle16.v v8, (a0)
438 ; ZVBB-NEXT: vcpop.v v8, v8
439 ; ZVBB-NEXT: vse16.v v8, (a0)
441 %a = load <16 x i16>, ptr %x
442 %b = load <16 x i16>, ptr %y
443 %c = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
444 store <16 x i16> %c, ptr %x
447 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
449 define void @ctpop_v8i32(ptr %x, ptr %y) {
450 ; LMULMAX2-LABEL: ctpop_v8i32:
452 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
453 ; LMULMAX2-NEXT: vle32.v v8, (a0)
454 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
455 ; LMULMAX2-NEXT: lui a1, 349525
456 ; LMULMAX2-NEXT: addi a1, a1, 1365
457 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
458 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
459 ; LMULMAX2-NEXT: lui a1, 209715
460 ; LMULMAX2-NEXT: addi a1, a1, 819
461 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
462 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
463 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
464 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
465 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
466 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
467 ; LMULMAX2-NEXT: lui a1, 61681
468 ; LMULMAX2-NEXT: addi a1, a1, -241
469 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
470 ; LMULMAX2-NEXT: lui a1, 4112
471 ; LMULMAX2-NEXT: addi a1, a1, 257
472 ; LMULMAX2-NEXT: vmul.vx v8, v8, a1
473 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 24
474 ; LMULMAX2-NEXT: vse32.v v8, (a0)
477 ; LMULMAX1-LABEL: ctpop_v8i32:
479 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
480 ; LMULMAX1-NEXT: addi a1, a0, 16
481 ; LMULMAX1-NEXT: vle32.v v8, (a1)
482 ; LMULMAX1-NEXT: vle32.v v9, (a0)
483 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
484 ; LMULMAX1-NEXT: lui a2, 349525
485 ; LMULMAX1-NEXT: addi a2, a2, 1365
486 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
487 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
488 ; LMULMAX1-NEXT: lui a3, 209715
489 ; LMULMAX1-NEXT: addi a3, a3, 819
490 ; LMULMAX1-NEXT: vand.vx v10, v8, a3
491 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
492 ; LMULMAX1-NEXT: vand.vx v8, v8, a3
493 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
494 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
495 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
496 ; LMULMAX1-NEXT: lui a4, 61681
497 ; LMULMAX1-NEXT: addi a4, a4, -241
498 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
499 ; LMULMAX1-NEXT: lui a5, 4112
500 ; LMULMAX1-NEXT: addi a5, a5, 257
501 ; LMULMAX1-NEXT: vmul.vx v8, v8, a5
502 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 24
503 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
504 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
505 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
506 ; LMULMAX1-NEXT: vand.vx v10, v9, a3
507 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
508 ; LMULMAX1-NEXT: vand.vx v9, v9, a3
509 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
510 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
511 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
512 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
513 ; LMULMAX1-NEXT: vmul.vx v9, v9, a5
514 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 24
515 ; LMULMAX1-NEXT: vse32.v v9, (a0)
516 ; LMULMAX1-NEXT: vse32.v v8, (a1)
519 ; ZVBB-LABEL: ctpop_v8i32:
521 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
522 ; ZVBB-NEXT: vle32.v v8, (a0)
523 ; ZVBB-NEXT: vcpop.v v8, v8
524 ; ZVBB-NEXT: vse32.v v8, (a0)
526 %a = load <8 x i32>, ptr %x
527 %b = load <8 x i32>, ptr %y
528 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
529 store <8 x i32> %c, ptr %x
532 define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) {
533 ; LMULMAX2-LABEL: ctpop_v8i32_ult_two:
535 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
536 ; LMULMAX2-NEXT: vle32.v v8, (a0)
537 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
538 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
539 ; LMULMAX2-NEXT: vmseq.vi v0, v8, 0
542 ; LMULMAX1-LABEL: ctpop_v8i32_ult_two:
544 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
545 ; LMULMAX1-NEXT: vle32.v v8, (a0)
546 ; LMULMAX1-NEXT: addi a0, a0, 16
547 ; LMULMAX1-NEXT: vle32.v v9, (a0)
548 ; LMULMAX1-NEXT: vadd.vi v10, v8, -1
549 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
550 ; LMULMAX1-NEXT: vmseq.vi v0, v8, 0
551 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
552 ; LMULMAX1-NEXT: vmv.v.i v8, 0
553 ; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
554 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
555 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
556 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
557 ; LMULMAX1-NEXT: vmseq.vi v0, v9, 0
558 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
559 ; LMULMAX1-NEXT: vmv.v.i v9, 0
560 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
561 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
562 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
563 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
566 ; ZVBB-LABEL: ctpop_v8i32_ult_two:
568 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
569 ; ZVBB-NEXT: vle32.v v8, (a0)
570 ; ZVBB-NEXT: vcpop.v v8, v8
571 ; ZVBB-NEXT: vmsleu.vi v0, v8, 1
573 %a = load <8 x i32>, ptr %x
574 %b = load <8 x i32>, ptr %y
575 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
576 %cmp = icmp ult <8 x i32> %c, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
579 define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) {
580 ; LMULMAX2-LABEL: ctpop_v8i32_ugt_one:
582 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
583 ; LMULMAX2-NEXT: vle32.v v8, (a0)
584 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
585 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
586 ; LMULMAX2-NEXT: vmsne.vi v0, v8, 0
589 ; LMULMAX1-LABEL: ctpop_v8i32_ugt_one:
591 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
592 ; LMULMAX1-NEXT: vle32.v v8, (a0)
593 ; LMULMAX1-NEXT: addi a0, a0, 16
594 ; LMULMAX1-NEXT: vle32.v v9, (a0)
595 ; LMULMAX1-NEXT: vadd.vi v10, v8, -1
596 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
597 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
598 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
599 ; LMULMAX1-NEXT: vmv.v.i v8, 0
600 ; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
601 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
602 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
603 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
604 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
605 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
606 ; LMULMAX1-NEXT: vmv.v.i v9, 0
607 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
608 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
609 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
610 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
613 ; ZVBB-LABEL: ctpop_v8i32_ugt_one:
615 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
616 ; ZVBB-NEXT: vle32.v v8, (a0)
617 ; ZVBB-NEXT: vcpop.v v8, v8
618 ; ZVBB-NEXT: vmsgtu.vi v0, v8, 1
620 %a = load <8 x i32>, ptr %x
621 %b = load <8 x i32>, ptr %y
622 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
623 %cmp = icmp ugt <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
626 define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) {
627 ; LMULMAX2-LABEL: ctpop_v8i32_eq_one:
629 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
630 ; LMULMAX2-NEXT: vle32.v v8, (a0)
631 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
632 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
633 ; LMULMAX2-NEXT: vmsltu.vv v0, v10, v8
636 ; LMULMAX1-LABEL: ctpop_v8i32_eq_one:
638 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
639 ; LMULMAX1-NEXT: vle32.v v8, (a0)
640 ; LMULMAX1-NEXT: addi a0, a0, 16
641 ; LMULMAX1-NEXT: vle32.v v9, (a0)
642 ; LMULMAX1-NEXT: vadd.vi v10, v8, -1
643 ; LMULMAX1-NEXT: vxor.vv v8, v8, v10
644 ; LMULMAX1-NEXT: vmsltu.vv v0, v10, v8
645 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
646 ; LMULMAX1-NEXT: vmv.v.i v8, 0
647 ; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
648 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
649 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
650 ; LMULMAX1-NEXT: vxor.vv v9, v9, v10
651 ; LMULMAX1-NEXT: vmsltu.vv v0, v10, v9
652 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
653 ; LMULMAX1-NEXT: vmv.v.i v9, 0
654 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
655 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
656 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
657 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
660 ; ZVBB-LABEL: ctpop_v8i32_eq_one:
662 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
663 ; ZVBB-NEXT: vle32.v v8, (a0)
664 ; ZVBB-NEXT: vcpop.v v8, v8
665 ; ZVBB-NEXT: vmseq.vi v0, v8, 1
667 %a = load <8 x i32>, ptr %x
668 %b = load <8 x i32>, ptr %y
669 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
670 %cmp = icmp eq <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
673 define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) {
674 ; LMULMAX2-LABEL: ctpop_v8i32_ne_one:
676 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
677 ; LMULMAX2-NEXT: vle32.v v8, (a0)
678 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
679 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
680 ; LMULMAX2-NEXT: vmsleu.vv v0, v8, v10
683 ; LMULMAX1-LABEL: ctpop_v8i32_ne_one:
685 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
686 ; LMULMAX1-NEXT: vle32.v v8, (a0)
687 ; LMULMAX1-NEXT: addi a0, a0, 16
688 ; LMULMAX1-NEXT: vle32.v v9, (a0)
689 ; LMULMAX1-NEXT: vadd.vi v10, v8, -1
690 ; LMULMAX1-NEXT: vxor.vv v8, v8, v10
691 ; LMULMAX1-NEXT: vmsleu.vv v0, v8, v10
692 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
693 ; LMULMAX1-NEXT: vmv.v.i v8, 0
694 ; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
695 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
696 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
697 ; LMULMAX1-NEXT: vxor.vv v9, v9, v10
698 ; LMULMAX1-NEXT: vmsleu.vv v0, v9, v10
699 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
700 ; LMULMAX1-NEXT: vmv.v.i v9, 0
701 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
702 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
703 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
704 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
707 ; ZVBB-LABEL: ctpop_v8i32_ne_one:
709 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
710 ; ZVBB-NEXT: vle32.v v8, (a0)
711 ; ZVBB-NEXT: vcpop.v v8, v8
712 ; ZVBB-NEXT: vmsne.vi v0, v8, 1
714 %a = load <8 x i32>, ptr %x
715 %b = load <8 x i32>, ptr %y
716 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
717 %cmp = icmp ne <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
720 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
722 define void @ctpop_v4i64(ptr %x, ptr %y) {
723 ; LMULMAX2-RV32-LABEL: ctpop_v4i64:
724 ; LMULMAX2-RV32: # %bb.0:
725 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
726 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
727 ; LMULMAX2-RV32-NEXT: lui a1, 349525
728 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
729 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
730 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
731 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
732 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1
733 ; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10
734 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
735 ; LMULMAX2-RV32-NEXT: lui a1, 209715
736 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819
737 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
738 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
739 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
740 ; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10
741 ; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
742 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
743 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8
744 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
745 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
746 ; LMULMAX2-RV32-NEXT: lui a1, 61681
747 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241
748 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
749 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
750 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
751 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
752 ; LMULMAX2-RV32-NEXT: lui a1, 4112
753 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257
754 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
755 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
756 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
757 ; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10
758 ; LMULMAX2-RV32-NEXT: li a1, 56
759 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1
760 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
761 ; LMULMAX2-RV32-NEXT: ret
763 ; LMULMAX2-RV64-LABEL: ctpop_v4i64:
764 ; LMULMAX2-RV64: # %bb.0:
765 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
766 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
767 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
768 ; LMULMAX2-RV64-NEXT: lui a1, 349525
769 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
770 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
771 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
772 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
773 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
774 ; LMULMAX2-RV64-NEXT: lui a1, 209715
775 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
776 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
777 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
778 ; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
779 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
780 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
781 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
782 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
783 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
784 ; LMULMAX2-RV64-NEXT: lui a1, 61681
785 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
786 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
787 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
788 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
789 ; LMULMAX2-RV64-NEXT: lui a1, 4112
790 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 257
791 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
792 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
793 ; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
794 ; LMULMAX2-RV64-NEXT: li a1, 56
795 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1
796 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
797 ; LMULMAX2-RV64-NEXT: ret
799 ; LMULMAX1-RV32-LABEL: ctpop_v4i64:
800 ; LMULMAX1-RV32: # %bb.0:
801 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
802 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
803 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
804 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
805 ; LMULMAX1-RV32-NEXT: lui a2, 349525
806 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
807 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
808 ; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2
809 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
810 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1
811 ; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10
812 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11
813 ; LMULMAX1-RV32-NEXT: lui a2, 209715
814 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819
815 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
816 ; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2
817 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
818 ; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11
819 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
820 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
821 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9
822 ; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4
823 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12
824 ; LMULMAX1-RV32-NEXT: lui a2, 61681
825 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241
826 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
827 ; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2
828 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
829 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12
830 ; LMULMAX1-RV32-NEXT: lui a2, 4112
831 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257
832 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
833 ; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2
834 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
835 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13
836 ; LMULMAX1-RV32-NEXT: li a2, 56
837 ; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2
838 ; LMULMAX1-RV32-NEXT: vsrl.vi v14, v8, 1
839 ; LMULMAX1-RV32-NEXT: vand.vv v10, v14, v10
840 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
841 ; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v11
842 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
843 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
844 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
845 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
846 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
847 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12
848 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v13
849 ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2
850 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
851 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
852 ; LMULMAX1-RV32-NEXT: ret
854 ; LMULMAX1-RV64-LABEL: ctpop_v4i64:
855 ; LMULMAX1-RV64: # %bb.0:
856 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
857 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
858 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a1)
859 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
860 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
861 ; LMULMAX1-RV64-NEXT: lui a2, 349525
862 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
863 ; LMULMAX1-RV64-NEXT: slli a3, a2, 32
864 ; LMULMAX1-RV64-NEXT: add a2, a2, a3
865 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
866 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
867 ; LMULMAX1-RV64-NEXT: lui a3, 209715
868 ; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
869 ; LMULMAX1-RV64-NEXT: slli a4, a3, 32
870 ; LMULMAX1-RV64-NEXT: add a3, a3, a4
871 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3
872 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
873 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
874 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
875 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
876 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
877 ; LMULMAX1-RV64-NEXT: lui a4, 61681
878 ; LMULMAX1-RV64-NEXT: addiw a4, a4, -241
879 ; LMULMAX1-RV64-NEXT: slli a5, a4, 32
880 ; LMULMAX1-RV64-NEXT: add a4, a4, a5
881 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
882 ; LMULMAX1-RV64-NEXT: lui a5, 4112
883 ; LMULMAX1-RV64-NEXT: addiw a5, a5, 257
884 ; LMULMAX1-RV64-NEXT: slli a6, a5, 32
885 ; LMULMAX1-RV64-NEXT: add a5, a5, a6
886 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5
887 ; LMULMAX1-RV64-NEXT: li a6, 56
888 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a6
889 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
890 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
891 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
892 ; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3
893 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
894 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3
895 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
896 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
897 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
898 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
899 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5
900 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6
901 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
902 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
903 ; LMULMAX1-RV64-NEXT: ret
905 ; ZVBB-LABEL: ctpop_v4i64:
907 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
908 ; ZVBB-NEXT: vle64.v v8, (a0)
909 ; ZVBB-NEXT: vcpop.v v8, v8
910 ; ZVBB-NEXT: vse64.v v8, (a0)
912 %a = load <4 x i64>, ptr %x
913 %b = load <4 x i64>, ptr %y
914 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
915 store <4 x i64> %c, ptr %x
918 define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) {
919 ; LMULMAX2-LABEL: ctpop_v4i64_ult_two:
921 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
922 ; LMULMAX2-NEXT: vle64.v v8, (a0)
923 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
924 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
925 ; LMULMAX2-NEXT: vmseq.vi v0, v8, 0
928 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_ult_two:
929 ; LMULMAX1-RV32: # %bb.0:
930 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
931 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
932 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
933 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
934 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
935 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
936 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
937 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10
938 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
939 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
940 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
941 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
942 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v9, v11
943 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
944 ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0
945 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0
946 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
947 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10
948 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
949 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v8, v11
950 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
951 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
952 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
953 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
954 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
955 ; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0
956 ; LMULMAX1-RV32-NEXT: ret
958 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_ult_two:
959 ; LMULMAX1-RV64: # %bb.0:
960 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
961 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
962 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
963 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
964 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1
965 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10
966 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v8, 0
967 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
968 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
969 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
970 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
971 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
972 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
973 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v9, 0
974 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
975 ; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0
976 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0
977 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
978 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
979 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
980 ; LMULMAX1-RV64-NEXT: ret
982 ; ZVBB-LABEL: ctpop_v4i64_ult_two:
984 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
985 ; ZVBB-NEXT: vle64.v v8, (a0)
986 ; ZVBB-NEXT: vcpop.v v8, v8
987 ; ZVBB-NEXT: vmsleu.vi v0, v8, 1
989 %a = load <4 x i64>, ptr %x
990 %b = load <4 x i64>, ptr %y
991 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
992 %cmp = icmp ult <4 x i64> %c, <i64 2, i64 2, i64 2, i64 2>
995 define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) {
996 ; LMULMAX2-LABEL: ctpop_v4i64_ugt_one:
998 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
999 ; LMULMAX2-NEXT: vle64.v v8, (a0)
1000 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
1001 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
1002 ; LMULMAX2-NEXT: vmsne.vi v0, v8, 0
1003 ; LMULMAX2-NEXT: ret
1005 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_ugt_one:
1006 ; LMULMAX1-RV32: # %bb.0:
1007 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
1008 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1009 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
1010 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
1011 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1012 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
1013 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1014 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10
1015 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
1016 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1017 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
1018 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1019 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v9, v11
1020 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1021 ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0
1022 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0
1023 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1024 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10
1025 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
1026 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v8, v11
1027 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1028 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
1029 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
1030 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1031 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
1032 ; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0
1033 ; LMULMAX1-RV32-NEXT: ret
1035 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_ugt_one:
1036 ; LMULMAX1-RV64: # %bb.0:
1037 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1038 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1039 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
1040 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
1041 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1
1042 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10
1043 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1044 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1045 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
1046 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
1047 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1048 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1049 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
1050 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v9, 0
1051 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1052 ; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0
1053 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0
1054 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1055 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1056 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1057 ; LMULMAX1-RV64-NEXT: ret
1059 ; ZVBB-LABEL: ctpop_v4i64_ugt_one:
1061 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1062 ; ZVBB-NEXT: vle64.v v8, (a0)
1063 ; ZVBB-NEXT: vcpop.v v8, v8
1064 ; ZVBB-NEXT: vmsgtu.vi v0, v8, 1
1066 %a = load <4 x i64>, ptr %x
1067 %b = load <4 x i64>, ptr %y
1068 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1069 %cmp = icmp ugt <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1>
1072 define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) {
1073 ; LMULMAX2-LABEL: ctpop_v4i64_eq_one:
1074 ; LMULMAX2: # %bb.0:
1075 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1076 ; LMULMAX2-NEXT: vle64.v v8, (a0)
1077 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
1078 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
1079 ; LMULMAX2-NEXT: vmsltu.vv v0, v10, v8
1080 ; LMULMAX2-NEXT: ret
1082 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_eq_one:
1083 ; LMULMAX1-RV32: # %bb.0:
1084 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
1085 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1086 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
1087 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
1088 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1089 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
1090 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1091 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10
1092 ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v11
1093 ; LMULMAX1-RV32-NEXT: vmsltu.vv v0, v11, v9
1094 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1095 ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0
1096 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0
1097 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1098 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10
1099 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v10
1100 ; LMULMAX1-RV32-NEXT: vmsltu.vv v0, v10, v8
1101 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1102 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
1103 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
1104 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1105 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
1106 ; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0
1107 ; LMULMAX1-RV32-NEXT: ret
1109 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_eq_one:
1110 ; LMULMAX1-RV64: # %bb.0:
1111 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1112 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1113 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
1114 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
1115 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1
1116 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v10
1117 ; LMULMAX1-RV64-NEXT: vmsltu.vv v0, v10, v8
1118 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1119 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
1120 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
1121 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1122 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1123 ; LMULMAX1-RV64-NEXT: vxor.vv v9, v9, v10
1124 ; LMULMAX1-RV64-NEXT: vmsltu.vv v0, v10, v9
1125 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1126 ; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0
1127 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0
1128 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1129 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1130 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1131 ; LMULMAX1-RV64-NEXT: ret
1133 ; ZVBB-LABEL: ctpop_v4i64_eq_one:
1135 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1136 ; ZVBB-NEXT: vle64.v v8, (a0)
1137 ; ZVBB-NEXT: vcpop.v v8, v8
1138 ; ZVBB-NEXT: vmseq.vi v0, v8, 1
1140 %a = load <4 x i64>, ptr %x
1141 %b = load <4 x i64>, ptr %y
1142 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1143 %cmp = icmp eq <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1>
1146 define <4 x i1> @ctpop_v4i64_ne_one(ptr %x, ptr %y) {
1147 ; LMULMAX2-LABEL: ctpop_v4i64_ne_one:
1148 ; LMULMAX2: # %bb.0:
1149 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1150 ; LMULMAX2-NEXT: vle64.v v8, (a0)
1151 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
1152 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
1153 ; LMULMAX2-NEXT: vmsleu.vv v0, v8, v10
1154 ; LMULMAX2-NEXT: ret
1156 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_ne_one:
1157 ; LMULMAX1-RV32: # %bb.0:
1158 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
1159 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1160 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
1161 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
1162 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1163 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
1164 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1165 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10
1166 ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v11
1167 ; LMULMAX1-RV32-NEXT: vmsleu.vv v0, v9, v11
1168 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1169 ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0
1170 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0
1171 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1172 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10
1173 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v10
1174 ; LMULMAX1-RV32-NEXT: vmsleu.vv v0, v8, v10
1175 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1176 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
1177 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
1178 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1179 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
1180 ; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0
1181 ; LMULMAX1-RV32-NEXT: ret
1183 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_ne_one:
1184 ; LMULMAX1-RV64: # %bb.0:
1185 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1186 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1187 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
1188 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
1189 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1
1190 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v10
1191 ; LMULMAX1-RV64-NEXT: vmsleu.vv v0, v8, v10
1192 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1193 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
1194 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
1195 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1196 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1197 ; LMULMAX1-RV64-NEXT: vxor.vv v9, v9, v10
1198 ; LMULMAX1-RV64-NEXT: vmsleu.vv v0, v9, v10
1199 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1200 ; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0
1201 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0
1202 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1203 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1204 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1205 ; LMULMAX1-RV64-NEXT: ret
1207 ; ZVBB-LABEL: ctpop_v4i64_ne_one:
1209 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1210 ; ZVBB-NEXT: vle64.v v8, (a0)
1211 ; ZVBB-NEXT: vcpop.v v8, v8
1212 ; ZVBB-NEXT: vmsne.vi v0, v8, 1
1214 %a = load <4 x i64>, ptr %x
1215 %b = load <4 x i64>, ptr %y
1216 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1217 %cmp = icmp ne <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1>
1220 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)