1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
7 ; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
9 define void @ctpop_v16i8(ptr %x, ptr %y) {
10 ; CHECK-LABEL: ctpop_v16i8:
12 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
13 ; CHECK-NEXT: vle8.v v8, (a0)
14 ; CHECK-NEXT: vsrl.vi v9, v8, 1
15 ; CHECK-NEXT: li a1, 85
16 ; CHECK-NEXT: vand.vx v9, v9, a1
17 ; CHECK-NEXT: vsub.vv v8, v8, v9
18 ; CHECK-NEXT: li a1, 51
19 ; CHECK-NEXT: vand.vx v9, v8, a1
20 ; CHECK-NEXT: vsrl.vi v8, v8, 2
21 ; CHECK-NEXT: vand.vx v8, v8, a1
22 ; CHECK-NEXT: vadd.vv v8, v9, v8
23 ; CHECK-NEXT: vsrl.vi v9, v8, 4
24 ; CHECK-NEXT: vadd.vv v8, v8, v9
25 ; CHECK-NEXT: vand.vi v8, v8, 15
26 ; CHECK-NEXT: vse8.v v8, (a0)
29 ; ZVBB-LABEL: ctpop_v16i8:
31 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
32 ; ZVBB-NEXT: vle8.v v8, (a0)
33 ; ZVBB-NEXT: vcpop.v v8, v8
34 ; ZVBB-NEXT: vse8.v v8, (a0)
36 %a = load <16 x i8>, ptr %x
37 %b = load <16 x i8>, ptr %y
38 %c = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
39 store <16 x i8> %c, ptr %x
42 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
44 define void @ctpop_v8i16(ptr %x, ptr %y) {
45 ; CHECK-LABEL: ctpop_v8i16:
47 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
48 ; CHECK-NEXT: vle16.v v8, (a0)
49 ; CHECK-NEXT: vsrl.vi v9, v8, 1
50 ; CHECK-NEXT: lui a1, 5
51 ; CHECK-NEXT: addi a1, a1, 1365
52 ; CHECK-NEXT: vand.vx v9, v9, a1
53 ; CHECK-NEXT: vsub.vv v8, v8, v9
54 ; CHECK-NEXT: lui a1, 3
55 ; CHECK-NEXT: addi a1, a1, 819
56 ; CHECK-NEXT: vand.vx v9, v8, a1
57 ; CHECK-NEXT: vsrl.vi v8, v8, 2
58 ; CHECK-NEXT: vand.vx v8, v8, a1
59 ; CHECK-NEXT: vadd.vv v8, v9, v8
60 ; CHECK-NEXT: vsrl.vi v9, v8, 4
61 ; CHECK-NEXT: vadd.vv v8, v8, v9
62 ; CHECK-NEXT: lui a1, 1
63 ; CHECK-NEXT: addi a1, a1, -241
64 ; CHECK-NEXT: vand.vx v8, v8, a1
65 ; CHECK-NEXT: li a1, 257
66 ; CHECK-NEXT: vmul.vx v8, v8, a1
67 ; CHECK-NEXT: vsrl.vi v8, v8, 8
68 ; CHECK-NEXT: vse16.v v8, (a0)
71 ; ZVBB-LABEL: ctpop_v8i16:
73 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
74 ; ZVBB-NEXT: vle16.v v8, (a0)
75 ; ZVBB-NEXT: vcpop.v v8, v8
76 ; ZVBB-NEXT: vse16.v v8, (a0)
78 %a = load <8 x i16>, ptr %x
79 %b = load <8 x i16>, ptr %y
80 %c = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
81 store <8 x i16> %c, ptr %x
84 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
86 define void @ctpop_v4i32(ptr %x, ptr %y) {
87 ; CHECK-LABEL: ctpop_v4i32:
89 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
90 ; CHECK-NEXT: vle32.v v8, (a0)
91 ; CHECK-NEXT: vsrl.vi v9, v8, 1
92 ; CHECK-NEXT: lui a1, 349525
93 ; CHECK-NEXT: addi a1, a1, 1365
94 ; CHECK-NEXT: vand.vx v9, v9, a1
95 ; CHECK-NEXT: vsub.vv v8, v8, v9
96 ; CHECK-NEXT: lui a1, 209715
97 ; CHECK-NEXT: addi a1, a1, 819
98 ; CHECK-NEXT: vand.vx v9, v8, a1
99 ; CHECK-NEXT: vsrl.vi v8, v8, 2
100 ; CHECK-NEXT: vand.vx v8, v8, a1
101 ; CHECK-NEXT: vadd.vv v8, v9, v8
102 ; CHECK-NEXT: vsrl.vi v9, v8, 4
103 ; CHECK-NEXT: vadd.vv v8, v8, v9
104 ; CHECK-NEXT: lui a1, 61681
105 ; CHECK-NEXT: addi a1, a1, -241
106 ; CHECK-NEXT: vand.vx v8, v8, a1
107 ; CHECK-NEXT: lui a1, 4112
108 ; CHECK-NEXT: addi a1, a1, 257
109 ; CHECK-NEXT: vmul.vx v8, v8, a1
110 ; CHECK-NEXT: vsrl.vi v8, v8, 24
111 ; CHECK-NEXT: vse32.v v8, (a0)
114 ; ZVBB-LABEL: ctpop_v4i32:
116 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
117 ; ZVBB-NEXT: vle32.v v8, (a0)
118 ; ZVBB-NEXT: vcpop.v v8, v8
119 ; ZVBB-NEXT: vse32.v v8, (a0)
121 %a = load <4 x i32>, ptr %x
122 %b = load <4 x i32>, ptr %y
123 %c = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
124 store <4 x i32> %c, ptr %x
127 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
129 define void @ctpop_v2i64(ptr %x, ptr %y) {
130 ; LMULMAX2-RV32-LABEL: ctpop_v2i64:
131 ; LMULMAX2-RV32: # %bb.0:
132 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
133 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
134 ; LMULMAX2-RV32-NEXT: lui a1, 349525
135 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
136 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
137 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
138 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
139 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
140 ; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9
141 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9
142 ; LMULMAX2-RV32-NEXT: lui a1, 209715
143 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819
144 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
145 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
146 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
147 ; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9
148 ; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
149 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
150 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
151 ; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4
152 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9
153 ; LMULMAX2-RV32-NEXT: lui a1, 61681
154 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241
155 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
156 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
157 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
158 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9
159 ; LMULMAX2-RV32-NEXT: lui a1, 4112
160 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257
161 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
162 ; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1
163 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
164 ; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9
165 ; LMULMAX2-RV32-NEXT: li a1, 56
166 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1
167 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
168 ; LMULMAX2-RV32-NEXT: ret
170 ; LMULMAX2-RV64-LABEL: ctpop_v2i64:
171 ; LMULMAX2-RV64: # %bb.0:
172 ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
173 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
174 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1
175 ; LMULMAX2-RV64-NEXT: lui a1, 349525
176 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
177 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
178 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
179 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1
180 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9
181 ; LMULMAX2-RV64-NEXT: lui a1, 209715
182 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
183 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
184 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
185 ; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1
186 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
187 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
188 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8
189 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4
190 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9
191 ; LMULMAX2-RV64-NEXT: lui a1, 61681
192 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
193 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
194 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
195 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
196 ; LMULMAX2-RV64-NEXT: lui a1, 4112
197 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 257
198 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
199 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
200 ; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
201 ; LMULMAX2-RV64-NEXT: li a1, 56
202 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1
203 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
204 ; LMULMAX2-RV64-NEXT: ret
206 ; LMULMAX1-RV32-LABEL: ctpop_v2i64:
207 ; LMULMAX1-RV32: # %bb.0:
208 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
209 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
210 ; LMULMAX1-RV32-NEXT: lui a1, 349525
211 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
212 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
213 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
214 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
215 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
216 ; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9
217 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
218 ; LMULMAX1-RV32-NEXT: lui a1, 209715
219 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819
220 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
221 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
222 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
223 ; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9
224 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
225 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
226 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
227 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
228 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
229 ; LMULMAX1-RV32-NEXT: lui a1, 61681
230 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241
231 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
232 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
233 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
234 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
235 ; LMULMAX1-RV32-NEXT: lui a1, 4112
236 ; LMULMAX1-RV32-NEXT: addi a1, a1, 257
237 ; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
238 ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1
239 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
240 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9
241 ; LMULMAX1-RV32-NEXT: li a1, 56
242 ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1
243 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
244 ; LMULMAX1-RV32-NEXT: ret
246 ; LMULMAX1-RV64-LABEL: ctpop_v2i64:
247 ; LMULMAX1-RV64: # %bb.0:
248 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
249 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
250 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
251 ; LMULMAX1-RV64-NEXT: lui a1, 349525
252 ; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
253 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
254 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
255 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
256 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
257 ; LMULMAX1-RV64-NEXT: lui a1, 209715
258 ; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
259 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
260 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
261 ; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
262 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
263 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
264 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
265 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
266 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
267 ; LMULMAX1-RV64-NEXT: lui a1, 61681
268 ; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
269 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
270 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
271 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
272 ; LMULMAX1-RV64-NEXT: lui a1, 4112
273 ; LMULMAX1-RV64-NEXT: addiw a1, a1, 257
274 ; LMULMAX1-RV64-NEXT: slli a2, a1, 32
275 ; LMULMAX1-RV64-NEXT: add a1, a1, a2
276 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
277 ; LMULMAX1-RV64-NEXT: li a1, 56
278 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1
279 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
280 ; LMULMAX1-RV64-NEXT: ret
282 ; ZVBB-LABEL: ctpop_v2i64:
284 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
285 ; ZVBB-NEXT: vle64.v v8, (a0)
286 ; ZVBB-NEXT: vcpop.v v8, v8
287 ; ZVBB-NEXT: vse64.v v8, (a0)
289 %a = load <2 x i64>, ptr %x
290 %b = load <2 x i64>, ptr %y
291 %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
292 store <2 x i64> %c, ptr %x
295 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
297 define void @ctpop_v32i8(ptr %x, ptr %y) {
298 ; LMULMAX2-LABEL: ctpop_v32i8:
300 ; LMULMAX2-NEXT: li a1, 32
301 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
302 ; LMULMAX2-NEXT: vle8.v v8, (a0)
303 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
304 ; LMULMAX2-NEXT: li a1, 85
305 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
306 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
307 ; LMULMAX2-NEXT: li a1, 51
308 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
309 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
310 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
311 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
312 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
313 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
314 ; LMULMAX2-NEXT: vand.vi v8, v8, 15
315 ; LMULMAX2-NEXT: vse8.v v8, (a0)
318 ; LMULMAX1-LABEL: ctpop_v32i8:
320 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
321 ; LMULMAX1-NEXT: addi a1, a0, 16
322 ; LMULMAX1-NEXT: vle8.v v8, (a1)
323 ; LMULMAX1-NEXT: vle8.v v9, (a0)
324 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
325 ; LMULMAX1-NEXT: li a2, 85
326 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
327 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
328 ; LMULMAX1-NEXT: li a3, 51
329 ; LMULMAX1-NEXT: vand.vx v10, v8, a3
330 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
331 ; LMULMAX1-NEXT: vand.vx v8, v8, a3
332 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
333 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
334 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
335 ; LMULMAX1-NEXT: vand.vi v8, v8, 15
336 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
337 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
338 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
339 ; LMULMAX1-NEXT: vand.vx v10, v9, a3
340 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
341 ; LMULMAX1-NEXT: vand.vx v9, v9, a3
342 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
343 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
344 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
345 ; LMULMAX1-NEXT: vand.vi v9, v9, 15
346 ; LMULMAX1-NEXT: vse8.v v9, (a0)
347 ; LMULMAX1-NEXT: vse8.v v8, (a1)
350 ; ZVBB-LABEL: ctpop_v32i8:
352 ; ZVBB-NEXT: li a1, 32
353 ; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
354 ; ZVBB-NEXT: vle8.v v8, (a0)
355 ; ZVBB-NEXT: vcpop.v v8, v8
356 ; ZVBB-NEXT: vse8.v v8, (a0)
358 %a = load <32 x i8>, ptr %x
359 %b = load <32 x i8>, ptr %y
360 %c = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
361 store <32 x i8> %c, ptr %x
364 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
366 define void @ctpop_v16i16(ptr %x, ptr %y) {
367 ; LMULMAX2-LABEL: ctpop_v16i16:
369 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
370 ; LMULMAX2-NEXT: vle16.v v8, (a0)
371 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
372 ; LMULMAX2-NEXT: lui a1, 5
373 ; LMULMAX2-NEXT: addi a1, a1, 1365
374 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
375 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
376 ; LMULMAX2-NEXT: lui a1, 3
377 ; LMULMAX2-NEXT: addi a1, a1, 819
378 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
379 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
380 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
381 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
382 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
383 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
384 ; LMULMAX2-NEXT: lui a1, 1
385 ; LMULMAX2-NEXT: addi a1, a1, -241
386 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
387 ; LMULMAX2-NEXT: li a1, 257
388 ; LMULMAX2-NEXT: vmul.vx v8, v8, a1
389 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
390 ; LMULMAX2-NEXT: vse16.v v8, (a0)
393 ; LMULMAX1-LABEL: ctpop_v16i16:
395 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
396 ; LMULMAX1-NEXT: addi a1, a0, 16
397 ; LMULMAX1-NEXT: vle16.v v8, (a1)
398 ; LMULMAX1-NEXT: vle16.v v9, (a0)
399 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
400 ; LMULMAX1-NEXT: lui a2, 5
401 ; LMULMAX1-NEXT: addi a2, a2, 1365
402 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
403 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
404 ; LMULMAX1-NEXT: lui a3, 3
405 ; LMULMAX1-NEXT: addi a3, a3, 819
406 ; LMULMAX1-NEXT: vand.vx v10, v8, a3
407 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
408 ; LMULMAX1-NEXT: vand.vx v8, v8, a3
409 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
410 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
411 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
412 ; LMULMAX1-NEXT: lui a4, 1
413 ; LMULMAX1-NEXT: addi a4, a4, -241
414 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
415 ; LMULMAX1-NEXT: li a5, 257
416 ; LMULMAX1-NEXT: vmul.vx v8, v8, a5
417 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
418 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
419 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
420 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
421 ; LMULMAX1-NEXT: vand.vx v10, v9, a3
422 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
423 ; LMULMAX1-NEXT: vand.vx v9, v9, a3
424 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
425 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
426 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
427 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
428 ; LMULMAX1-NEXT: vmul.vx v9, v9, a5
429 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
430 ; LMULMAX1-NEXT: vse16.v v9, (a0)
431 ; LMULMAX1-NEXT: vse16.v v8, (a1)
434 ; ZVBB-LABEL: ctpop_v16i16:
436 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
437 ; ZVBB-NEXT: vle16.v v8, (a0)
438 ; ZVBB-NEXT: vcpop.v v8, v8
439 ; ZVBB-NEXT: vse16.v v8, (a0)
441 %a = load <16 x i16>, ptr %x
442 %b = load <16 x i16>, ptr %y
443 %c = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
444 store <16 x i16> %c, ptr %x
447 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
449 define void @ctpop_v8i32(ptr %x, ptr %y) {
450 ; LMULMAX2-LABEL: ctpop_v8i32:
452 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
453 ; LMULMAX2-NEXT: vle32.v v8, (a0)
454 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
455 ; LMULMAX2-NEXT: lui a1, 349525
456 ; LMULMAX2-NEXT: addi a1, a1, 1365
457 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
458 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
459 ; LMULMAX2-NEXT: lui a1, 209715
460 ; LMULMAX2-NEXT: addi a1, a1, 819
461 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
462 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
463 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
464 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
465 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
466 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
467 ; LMULMAX2-NEXT: lui a1, 61681
468 ; LMULMAX2-NEXT: addi a1, a1, -241
469 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
470 ; LMULMAX2-NEXT: lui a1, 4112
471 ; LMULMAX2-NEXT: addi a1, a1, 257
472 ; LMULMAX2-NEXT: vmul.vx v8, v8, a1
473 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 24
474 ; LMULMAX2-NEXT: vse32.v v8, (a0)
477 ; LMULMAX1-LABEL: ctpop_v8i32:
479 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
480 ; LMULMAX1-NEXT: addi a1, a0, 16
481 ; LMULMAX1-NEXT: vle32.v v8, (a1)
482 ; LMULMAX1-NEXT: vle32.v v9, (a0)
483 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
484 ; LMULMAX1-NEXT: lui a2, 349525
485 ; LMULMAX1-NEXT: addi a2, a2, 1365
486 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
487 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
488 ; LMULMAX1-NEXT: lui a3, 209715
489 ; LMULMAX1-NEXT: addi a3, a3, 819
490 ; LMULMAX1-NEXT: vand.vx v10, v8, a3
491 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
492 ; LMULMAX1-NEXT: vand.vx v8, v8, a3
493 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
494 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
495 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
496 ; LMULMAX1-NEXT: lui a4, 61681
497 ; LMULMAX1-NEXT: addi a4, a4, -241
498 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
499 ; LMULMAX1-NEXT: lui a5, 4112
500 ; LMULMAX1-NEXT: addi a5, a5, 257
501 ; LMULMAX1-NEXT: vmul.vx v8, v8, a5
502 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 24
503 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
504 ; LMULMAX1-NEXT: vand.vx v10, v10, a2
505 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
506 ; LMULMAX1-NEXT: vand.vx v10, v9, a3
507 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
508 ; LMULMAX1-NEXT: vand.vx v9, v9, a3
509 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
510 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
511 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
512 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
513 ; LMULMAX1-NEXT: vmul.vx v9, v9, a5
514 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 24
515 ; LMULMAX1-NEXT: vse32.v v9, (a0)
516 ; LMULMAX1-NEXT: vse32.v v8, (a1)
519 ; ZVBB-LABEL: ctpop_v8i32:
521 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
522 ; ZVBB-NEXT: vle32.v v8, (a0)
523 ; ZVBB-NEXT: vcpop.v v8, v8
524 ; ZVBB-NEXT: vse32.v v8, (a0)
526 %a = load <8 x i32>, ptr %x
527 %b = load <8 x i32>, ptr %y
528 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
529 store <8 x i32> %c, ptr %x
532 define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) {
533 ; LMULMAX2-LABEL: ctpop_v8i32_ult_two:
535 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
536 ; LMULMAX2-NEXT: vle32.v v8, (a0)
537 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
538 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
539 ; LMULMAX2-NEXT: vmseq.vi v0, v8, 0
542 ; LMULMAX1-LABEL: ctpop_v8i32_ult_two:
544 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
545 ; LMULMAX1-NEXT: vle32.v v8, (a0)
546 ; LMULMAX1-NEXT: addi a0, a0, 16
547 ; LMULMAX1-NEXT: vle32.v v9, (a0)
548 ; LMULMAX1-NEXT: vadd.vi v10, v8, -1
549 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
550 ; LMULMAX1-NEXT: vmseq.vi v0, v8, 0
551 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
552 ; LMULMAX1-NEXT: vmv.v.i v8, 0
553 ; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
554 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
555 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
556 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
557 ; LMULMAX1-NEXT: vmseq.vi v0, v9, 0
558 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
559 ; LMULMAX1-NEXT: vmv.v.i v9, 0
560 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
561 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
562 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
563 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
566 ; ZVBB-LABEL: ctpop_v8i32_ult_two:
568 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
569 ; ZVBB-NEXT: vle32.v v8, (a0)
570 ; ZVBB-NEXT: vcpop.v v8, v8
571 ; ZVBB-NEXT: vmsleu.vi v0, v8, 1
573 %a = load <8 x i32>, ptr %x
574 %b = load <8 x i32>, ptr %y
575 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
576 %cmp = icmp ult <8 x i32> %c, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
579 define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) {
580 ; LMULMAX2-LABEL: ctpop_v8i32_ugt_one:
582 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
583 ; LMULMAX2-NEXT: vle32.v v8, (a0)
584 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
585 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
586 ; LMULMAX2-NEXT: vmsne.vi v0, v8, 0
589 ; LMULMAX1-LABEL: ctpop_v8i32_ugt_one:
591 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
592 ; LMULMAX1-NEXT: vle32.v v8, (a0)
593 ; LMULMAX1-NEXT: addi a0, a0, 16
594 ; LMULMAX1-NEXT: vle32.v v9, (a0)
595 ; LMULMAX1-NEXT: vadd.vi v10, v8, -1
596 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
597 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
598 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
599 ; LMULMAX1-NEXT: vmv.v.i v8, 0
600 ; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
601 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
602 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
603 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
604 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
605 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
606 ; LMULMAX1-NEXT: vmv.v.i v9, 0
607 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
608 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
609 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
610 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
613 ; ZVBB-LABEL: ctpop_v8i32_ugt_one:
615 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
616 ; ZVBB-NEXT: vle32.v v8, (a0)
617 ; ZVBB-NEXT: vcpop.v v8, v8
618 ; ZVBB-NEXT: vmsgtu.vi v0, v8, 1
620 %a = load <8 x i32>, ptr %x
621 %b = load <8 x i32>, ptr %y
622 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
623 %cmp = icmp ugt <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
626 define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) {
627 ; LMULMAX2-LABEL: ctpop_v8i32_eq_one:
629 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
630 ; LMULMAX2-NEXT: vle32.v v8, (a0)
631 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
632 ; LMULMAX2-NEXT: vand.vv v10, v8, v10
633 ; LMULMAX2-NEXT: vmseq.vi v12, v10, 0
634 ; LMULMAX2-NEXT: vmsne.vi v10, v8, 0
635 ; LMULMAX2-NEXT: vmand.mm v0, v10, v12
638 ; LMULMAX1-LABEL: ctpop_v8i32_eq_one:
640 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
641 ; LMULMAX1-NEXT: vle32.v v8, (a0)
642 ; LMULMAX1-NEXT: addi a0, a0, 16
643 ; LMULMAX1-NEXT: vle32.v v9, (a0)
644 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
645 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
646 ; LMULMAX1-NEXT: vmv.v.i v10, 0
647 ; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0
648 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
649 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
650 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
651 ; LMULMAX1-NEXT: vmv.v.i v12, 0
652 ; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0
653 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
654 ; LMULMAX1-NEXT: vslideup.vi v11, v13, 4
655 ; LMULMAX1-NEXT: vmsne.vi v11, v11, 0
656 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
657 ; LMULMAX1-NEXT: vadd.vi v13, v8, -1
658 ; LMULMAX1-NEXT: vand.vv v8, v8, v13
659 ; LMULMAX1-NEXT: vmseq.vi v0, v8, 0
660 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
661 ; LMULMAX1-NEXT: vmerge.vim v8, v10, 1, v0
662 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
663 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
664 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
665 ; LMULMAX1-NEXT: vmseq.vi v0, v9, 0
666 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
667 ; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0
668 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
669 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
670 ; LMULMAX1-NEXT: vmsne.vi v8, v8, 0
671 ; LMULMAX1-NEXT: vmand.mm v0, v11, v8
674 ; ZVBB-LABEL: ctpop_v8i32_eq_one:
676 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
677 ; ZVBB-NEXT: vle32.v v8, (a0)
678 ; ZVBB-NEXT: vcpop.v v8, v8
679 ; ZVBB-NEXT: vmseq.vi v0, v8, 1
681 %a = load <8 x i32>, ptr %x
682 %b = load <8 x i32>, ptr %y
683 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
684 %cmp = icmp eq <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
687 define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) {
688 ; LMULMAX2-LABEL: ctpop_v8i32_ne_one:
690 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
691 ; LMULMAX2-NEXT: vle32.v v8, (a0)
692 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
693 ; LMULMAX2-NEXT: vand.vv v10, v8, v10
694 ; LMULMAX2-NEXT: vmsne.vi v12, v10, 0
695 ; LMULMAX2-NEXT: vmseq.vi v10, v8, 0
696 ; LMULMAX2-NEXT: vmor.mm v0, v10, v12
699 ; LMULMAX1-LABEL: ctpop_v8i32_ne_one:
701 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
702 ; LMULMAX1-NEXT: vle32.v v8, (a0)
703 ; LMULMAX1-NEXT: addi a0, a0, 16
704 ; LMULMAX1-NEXT: vle32.v v9, (a0)
705 ; LMULMAX1-NEXT: vmseq.vi v0, v8, 0
706 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
707 ; LMULMAX1-NEXT: vmv.v.i v10, 0
708 ; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0
709 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
710 ; LMULMAX1-NEXT: vmseq.vi v0, v9, 0
711 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
712 ; LMULMAX1-NEXT: vmv.v.i v12, 0
713 ; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0
714 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
715 ; LMULMAX1-NEXT: vslideup.vi v11, v13, 4
716 ; LMULMAX1-NEXT: vmsne.vi v11, v11, 0
717 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
718 ; LMULMAX1-NEXT: vadd.vi v13, v8, -1
719 ; LMULMAX1-NEXT: vand.vv v8, v8, v13
720 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
721 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
722 ; LMULMAX1-NEXT: vmerge.vim v8, v10, 1, v0
723 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
724 ; LMULMAX1-NEXT: vadd.vi v10, v9, -1
725 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
726 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
727 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
728 ; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0
729 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
730 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
731 ; LMULMAX1-NEXT: vmsne.vi v8, v8, 0
732 ; LMULMAX1-NEXT: vmor.mm v0, v11, v8
735 ; ZVBB-LABEL: ctpop_v8i32_ne_one:
737 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
738 ; ZVBB-NEXT: vle32.v v8, (a0)
739 ; ZVBB-NEXT: vcpop.v v8, v8
740 ; ZVBB-NEXT: vmsne.vi v0, v8, 1
742 %a = load <8 x i32>, ptr %x
743 %b = load <8 x i32>, ptr %y
744 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
745 %cmp = icmp ne <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
748 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
750 define void @ctpop_v4i64(ptr %x, ptr %y) {
751 ; LMULMAX2-RV32-LABEL: ctpop_v4i64:
752 ; LMULMAX2-RV32: # %bb.0:
753 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
754 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
755 ; LMULMAX2-RV32-NEXT: lui a1, 349525
756 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
757 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
758 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
759 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
760 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1
761 ; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10
762 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
763 ; LMULMAX2-RV32-NEXT: lui a1, 209715
764 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819
765 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
766 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
767 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
768 ; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10
769 ; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
770 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
771 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8
772 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
773 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
774 ; LMULMAX2-RV32-NEXT: lui a1, 61681
775 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241
776 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
777 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
778 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
779 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
780 ; LMULMAX2-RV32-NEXT: lui a1, 4112
781 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257
782 ; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
783 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
784 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
785 ; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10
786 ; LMULMAX2-RV32-NEXT: li a1, 56
787 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1
788 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
789 ; LMULMAX2-RV32-NEXT: ret
791 ; LMULMAX2-RV64-LABEL: ctpop_v4i64:
792 ; LMULMAX2-RV64: # %bb.0:
793 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
794 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
795 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
796 ; LMULMAX2-RV64-NEXT: lui a1, 349525
797 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
798 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
799 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
800 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
801 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
802 ; LMULMAX2-RV64-NEXT: lui a1, 209715
803 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
804 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
805 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
806 ; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
807 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
808 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
809 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
810 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
811 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
812 ; LMULMAX2-RV64-NEXT: lui a1, 61681
813 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
814 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
815 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
816 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
817 ; LMULMAX2-RV64-NEXT: lui a1, 4112
818 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 257
819 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
820 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
821 ; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
822 ; LMULMAX2-RV64-NEXT: li a1, 56
823 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1
824 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
825 ; LMULMAX2-RV64-NEXT: ret
827 ; LMULMAX1-RV32-LABEL: ctpop_v4i64:
828 ; LMULMAX1-RV32: # %bb.0:
829 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
830 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
831 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
832 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
833 ; LMULMAX1-RV32-NEXT: lui a2, 349525
834 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
835 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
836 ; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2
837 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
838 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1
839 ; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10
840 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11
841 ; LMULMAX1-RV32-NEXT: lui a2, 209715
842 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819
843 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
844 ; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2
845 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
846 ; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11
847 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
848 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
849 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9
850 ; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4
851 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12
852 ; LMULMAX1-RV32-NEXT: lui a2, 61681
853 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241
854 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
855 ; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2
856 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
857 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12
858 ; LMULMAX1-RV32-NEXT: lui a2, 4112
859 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257
860 ; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
861 ; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2
862 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
863 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13
864 ; LMULMAX1-RV32-NEXT: li a2, 56
865 ; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2
866 ; LMULMAX1-RV32-NEXT: vsrl.vi v14, v8, 1
867 ; LMULMAX1-RV32-NEXT: vand.vv v10, v14, v10
868 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
869 ; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v11
870 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
871 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
872 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
873 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
874 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
875 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12
876 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v13
877 ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2
878 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
879 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
880 ; LMULMAX1-RV32-NEXT: ret
882 ; LMULMAX1-RV64-LABEL: ctpop_v4i64:
883 ; LMULMAX1-RV64: # %bb.0:
884 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
885 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
886 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a1)
887 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
888 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
889 ; LMULMAX1-RV64-NEXT: lui a2, 349525
890 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
891 ; LMULMAX1-RV64-NEXT: slli a3, a2, 32
892 ; LMULMAX1-RV64-NEXT: add a2, a2, a3
893 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
894 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
895 ; LMULMAX1-RV64-NEXT: lui a3, 209715
896 ; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
897 ; LMULMAX1-RV64-NEXT: slli a4, a3, 32
898 ; LMULMAX1-RV64-NEXT: add a3, a3, a4
899 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3
900 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
901 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
902 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
903 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
904 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
905 ; LMULMAX1-RV64-NEXT: lui a4, 61681
906 ; LMULMAX1-RV64-NEXT: addiw a4, a4, -241
907 ; LMULMAX1-RV64-NEXT: slli a5, a4, 32
908 ; LMULMAX1-RV64-NEXT: add a4, a4, a5
909 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
910 ; LMULMAX1-RV64-NEXT: lui a5, 4112
911 ; LMULMAX1-RV64-NEXT: addiw a5, a5, 257
912 ; LMULMAX1-RV64-NEXT: slli a6, a5, 32
913 ; LMULMAX1-RV64-NEXT: add a5, a5, a6
914 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5
915 ; LMULMAX1-RV64-NEXT: li a6, 56
916 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a6
917 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
918 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
919 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
920 ; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3
921 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
922 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3
923 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
924 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
925 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
926 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
927 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5
928 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6
929 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
930 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
931 ; LMULMAX1-RV64-NEXT: ret
933 ; ZVBB-LABEL: ctpop_v4i64:
935 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
936 ; ZVBB-NEXT: vle64.v v8, (a0)
937 ; ZVBB-NEXT: vcpop.v v8, v8
938 ; ZVBB-NEXT: vse64.v v8, (a0)
940 %a = load <4 x i64>, ptr %x
941 %b = load <4 x i64>, ptr %y
942 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
943 store <4 x i64> %c, ptr %x
946 define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) {
947 ; LMULMAX2-LABEL: ctpop_v4i64_ult_two:
949 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
950 ; LMULMAX2-NEXT: vle64.v v8, (a0)
951 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
952 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
953 ; LMULMAX2-NEXT: vmseq.vi v0, v8, 0
956 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_ult_two:
957 ; LMULMAX1-RV32: # %bb.0:
958 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
959 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
960 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
961 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
962 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
963 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
964 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
965 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10
966 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
967 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
968 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
969 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
970 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v9, v11
971 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
972 ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0
973 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0
974 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
975 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10
976 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
977 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v8, v11
978 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
979 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
980 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
981 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
982 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
983 ; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0
984 ; LMULMAX1-RV32-NEXT: ret
986 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_ult_two:
987 ; LMULMAX1-RV64: # %bb.0:
988 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
989 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
990 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
991 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
992 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1
993 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10
994 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v8, 0
995 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
996 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
997 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
998 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
999 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1000 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
1001 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v9, 0
1002 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1003 ; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0
1004 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0
1005 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1006 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1007 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1008 ; LMULMAX1-RV64-NEXT: ret
1010 ; ZVBB-LABEL: ctpop_v4i64_ult_two:
1012 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1013 ; ZVBB-NEXT: vle64.v v8, (a0)
1014 ; ZVBB-NEXT: vcpop.v v8, v8
1015 ; ZVBB-NEXT: vmsleu.vi v0, v8, 1
1017 %a = load <4 x i64>, ptr %x
1018 %b = load <4 x i64>, ptr %y
1019 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1020 %cmp = icmp ult <4 x i64> %c, <i64 2, i64 2, i64 2, i64 2>
1023 define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) {
1024 ; LMULMAX2-LABEL: ctpop_v4i64_ugt_one:
1025 ; LMULMAX2: # %bb.0:
1026 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1027 ; LMULMAX2-NEXT: vle64.v v8, (a0)
1028 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
1029 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
1030 ; LMULMAX2-NEXT: vmsne.vi v0, v8, 0
1031 ; LMULMAX2-NEXT: ret
1033 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_ugt_one:
1034 ; LMULMAX1-RV32: # %bb.0:
1035 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
1036 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1037 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
1038 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
1039 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1040 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1
1041 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1042 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10
1043 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11
1044 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1045 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
1046 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1047 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v9, v11
1048 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1049 ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0
1050 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0
1051 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1052 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10
1053 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
1054 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v8, v11
1055 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1056 ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
1057 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0
1058 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1059 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
1060 ; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0
1061 ; LMULMAX1-RV32-NEXT: ret
1063 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_ugt_one:
1064 ; LMULMAX1-RV64: # %bb.0:
1065 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1066 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1067 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
1068 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
1069 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1
1070 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10
1071 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1072 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1073 ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0
1074 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0
1075 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1076 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1077 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
1078 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v9, 0
1079 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1080 ; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0
1081 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0
1082 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1083 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1084 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1085 ; LMULMAX1-RV64-NEXT: ret
1087 ; ZVBB-LABEL: ctpop_v4i64_ugt_one:
1089 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1090 ; ZVBB-NEXT: vle64.v v8, (a0)
1091 ; ZVBB-NEXT: vcpop.v v8, v8
1092 ; ZVBB-NEXT: vmsgtu.vi v0, v8, 1
1094 %a = load <4 x i64>, ptr %x
1095 %b = load <4 x i64>, ptr %y
1096 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1097 %cmp = icmp ugt <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1>
1100 define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) {
1101 ; LMULMAX2-LABEL: ctpop_v4i64_eq_one:
1102 ; LMULMAX2: # %bb.0:
1103 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1104 ; LMULMAX2-NEXT: vle64.v v8, (a0)
1105 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
1106 ; LMULMAX2-NEXT: vand.vv v10, v8, v10
1107 ; LMULMAX2-NEXT: vmseq.vi v12, v10, 0
1108 ; LMULMAX2-NEXT: vmsne.vi v10, v8, 0
1109 ; LMULMAX2-NEXT: vmand.mm v0, v10, v12
1110 ; LMULMAX2-NEXT: ret
1112 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_eq_one:
1113 ; LMULMAX1-RV32: # %bb.0:
1114 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
1115 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1116 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
1117 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
1118 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1119 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, 0
1120 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1121 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v9, v10
1122 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1123 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
1124 ; LMULMAX1-RV32-NEXT: vmerge.vim v12, v11, 1, v0
1125 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1126 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v8, v10
1127 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1128 ; LMULMAX1-RV32-NEXT: vmv.v.i v13, 0
1129 ; LMULMAX1-RV32-NEXT: vmerge.vim v14, v13, 1, v0
1130 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1131 ; LMULMAX1-RV32-NEXT: vslideup.vi v12, v14, 2
1132 ; LMULMAX1-RV32-NEXT: vmsne.vi v12, v12, 0
1133 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1134 ; LMULMAX1-RV32-NEXT: vmv.v.i v14, -1
1135 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1136 ; LMULMAX1-RV32-NEXT: vadd.vv v15, v9, v14
1137 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v15
1138 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v9, v10
1139 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1140 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v11, 1, v0
1141 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1142 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v8, v14
1143 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
1144 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v8, v10
1145 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1146 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v13, 1, v0
1147 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1148 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
1149 ; LMULMAX1-RV32-NEXT: vmsne.vi v8, v9, 0
1150 ; LMULMAX1-RV32-NEXT: vmand.mm v0, v12, v8
1151 ; LMULMAX1-RV32-NEXT: ret
1153 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_eq_one:
1154 ; LMULMAX1-RV64: # %bb.0:
1155 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1156 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1157 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
1158 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
1159 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1160 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1161 ; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0
1162 ; LMULMAX1-RV64-NEXT: vmerge.vim v11, v10, 1, v0
1163 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1164 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v9, 0
1165 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1166 ; LMULMAX1-RV64-NEXT: vmv.v.i v12, 0
1167 ; LMULMAX1-RV64-NEXT: vmerge.vim v13, v12, 1, v0
1168 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1169 ; LMULMAX1-RV64-NEXT: vslideup.vi v11, v13, 2
1170 ; LMULMAX1-RV64-NEXT: vmsne.vi v11, v11, 0
1171 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1172 ; LMULMAX1-RV64-NEXT: vadd.vi v13, v8, -1
1173 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v13
1174 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v8, 0
1175 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1176 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v10, 1, v0
1177 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1178 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1179 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
1180 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v9, 0
1181 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1182 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v12, 1, v0
1183 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1184 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1185 ; LMULMAX1-RV64-NEXT: vmsne.vi v8, v8, 0
1186 ; LMULMAX1-RV64-NEXT: vmand.mm v0, v11, v8
1187 ; LMULMAX1-RV64-NEXT: ret
1189 ; ZVBB-LABEL: ctpop_v4i64_eq_one:
1191 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1192 ; ZVBB-NEXT: vle64.v v8, (a0)
1193 ; ZVBB-NEXT: vcpop.v v8, v8
1194 ; ZVBB-NEXT: vmseq.vi v0, v8, 1
1196 %a = load <4 x i64>, ptr %x
1197 %b = load <4 x i64>, ptr %y
1198 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1199 %cmp = icmp eq <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1>
1202 define <4 x i1> @ctpop_v4i64_ne_one(ptr %x, ptr %y) {
1203 ; LMULMAX2-LABEL: ctpop_v4i64_ne_one:
1204 ; LMULMAX2: # %bb.0:
1205 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1206 ; LMULMAX2-NEXT: vle64.v v8, (a0)
1207 ; LMULMAX2-NEXT: vadd.vi v10, v8, -1
1208 ; LMULMAX2-NEXT: vand.vv v10, v8, v10
1209 ; LMULMAX2-NEXT: vmsne.vi v12, v10, 0
1210 ; LMULMAX2-NEXT: vmseq.vi v10, v8, 0
1211 ; LMULMAX2-NEXT: vmor.mm v0, v10, v12
1212 ; LMULMAX2-NEXT: ret
1214 ; LMULMAX1-RV32-LABEL: ctpop_v4i64_ne_one:
1215 ; LMULMAX1-RV32: # %bb.0:
1216 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
1217 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1218 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a1)
1219 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a0)
1220 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1221 ; LMULMAX1-RV32-NEXT: vmv.v.i v10, 0
1222 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1223 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v9, v10
1224 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1225 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
1226 ; LMULMAX1-RV32-NEXT: vmerge.vim v12, v11, 1, v0
1227 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1228 ; LMULMAX1-RV32-NEXT: vmseq.vv v0, v8, v10
1229 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1230 ; LMULMAX1-RV32-NEXT: vmv.v.i v13, 0
1231 ; LMULMAX1-RV32-NEXT: vmerge.vim v14, v13, 1, v0
1232 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1233 ; LMULMAX1-RV32-NEXT: vslideup.vi v12, v14, 2
1234 ; LMULMAX1-RV32-NEXT: vmsne.vi v12, v12, 0
1235 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1236 ; LMULMAX1-RV32-NEXT: vmv.v.i v14, -1
1237 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1238 ; LMULMAX1-RV32-NEXT: vadd.vv v15, v9, v14
1239 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v15
1240 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v9, v10
1241 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1242 ; LMULMAX1-RV32-NEXT: vmerge.vim v9, v11, 1, v0
1243 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1244 ; LMULMAX1-RV32-NEXT: vadd.vv v11, v8, v14
1245 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
1246 ; LMULMAX1-RV32-NEXT: vmsne.vv v0, v8, v10
1247 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1248 ; LMULMAX1-RV32-NEXT: vmerge.vim v8, v13, 1, v0
1249 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1250 ; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2
1251 ; LMULMAX1-RV32-NEXT: vmsne.vi v8, v9, 0
1252 ; LMULMAX1-RV32-NEXT: vmor.mm v0, v12, v8
1253 ; LMULMAX1-RV32-NEXT: ret
1255 ; LMULMAX1-RV64-LABEL: ctpop_v4i64_ne_one:
1256 ; LMULMAX1-RV64: # %bb.0:
1257 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1258 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1259 ; LMULMAX1-RV64-NEXT: addi a0, a0, 16
1260 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
1261 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v8, 0
1262 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1263 ; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0
1264 ; LMULMAX1-RV64-NEXT: vmerge.vim v11, v10, 1, v0
1265 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1266 ; LMULMAX1-RV64-NEXT: vmseq.vi v0, v9, 0
1267 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1268 ; LMULMAX1-RV64-NEXT: vmv.v.i v12, 0
1269 ; LMULMAX1-RV64-NEXT: vmerge.vim v13, v12, 1, v0
1270 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1271 ; LMULMAX1-RV64-NEXT: vslideup.vi v11, v13, 2
1272 ; LMULMAX1-RV64-NEXT: vmsne.vi v11, v11, 0
1273 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1274 ; LMULMAX1-RV64-NEXT: vadd.vi v13, v8, -1
1275 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v13
1276 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0
1277 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1278 ; LMULMAX1-RV64-NEXT: vmerge.vim v8, v10, 1, v0
1279 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1280 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1
1281 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
1282 ; LMULMAX1-RV64-NEXT: vmsne.vi v0, v9, 0
1283 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1284 ; LMULMAX1-RV64-NEXT: vmerge.vim v9, v12, 1, v0
1285 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1286 ; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2
1287 ; LMULMAX1-RV64-NEXT: vmsne.vi v8, v8, 0
1288 ; LMULMAX1-RV64-NEXT: vmor.mm v0, v11, v8
1289 ; LMULMAX1-RV64-NEXT: ret
1291 ; ZVBB-LABEL: ctpop_v4i64_ne_one:
1293 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1294 ; ZVBB-NEXT: vle64.v v8, (a0)
1295 ; ZVBB-NEXT: vcpop.v v8, v8
1296 ; ZVBB-NEXT: vmsne.vi v0, v8, 1
1298 %a = load <4 x i64>, ptr %x
1299 %b = load <4 x i64>, ptr %y
1300 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
1301 %cmp = icmp ne <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1>
1304 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)