1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D
7 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D
8 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
9 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
11 define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
12 ; RVI-LABEL: cttz_v16i8:
14 ; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma
15 ; RVI-NEXT: vle8.v v8, (a0)
17 ; RVI-NEXT: vsub.vx v9, v8, a1
18 ; RVI-NEXT: vnot.v v8, v8
19 ; RVI-NEXT: vand.vv v8, v8, v9
20 ; RVI-NEXT: vsrl.vi v9, v8, 1
22 ; RVI-NEXT: vand.vx v9, v9, a1
23 ; RVI-NEXT: vsub.vv v8, v8, v9
25 ; RVI-NEXT: vand.vx v9, v8, a1
26 ; RVI-NEXT: vsrl.vi v8, v8, 2
27 ; RVI-NEXT: vand.vx v8, v8, a1
28 ; RVI-NEXT: vadd.vv v8, v9, v8
29 ; RVI-NEXT: vsrl.vi v9, v8, 4
30 ; RVI-NEXT: vadd.vv v8, v8, v9
31 ; RVI-NEXT: vand.vi v8, v8, 15
32 ; RVI-NEXT: vse8.v v8, (a0)
35 ; RVF-LABEL: cttz_v16i8:
37 ; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma
38 ; RVF-NEXT: vle8.v v8, (a0)
39 ; RVF-NEXT: vrsub.vi v9, v8, 0
40 ; RVF-NEXT: vand.vv v9, v8, v9
41 ; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma
42 ; RVF-NEXT: vzext.vf2 v10, v9
43 ; RVF-NEXT: vfwcvt.f.xu.v v12, v10
44 ; RVF-NEXT: vnsrl.wi v10, v12, 23
45 ; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
46 ; RVF-NEXT: vnsrl.wi v9, v10, 0
47 ; RVF-NEXT: li a1, 127
48 ; RVF-NEXT: vmseq.vi v0, v8, 0
49 ; RVF-NEXT: vsub.vx v8, v9, a1
50 ; RVF-NEXT: vmerge.vim v8, v8, 8, v0
51 ; RVF-NEXT: vse8.v v8, (a0)
54 ; RVD-LABEL: cttz_v16i8:
56 ; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma
57 ; RVD-NEXT: vle8.v v8, (a0)
58 ; RVD-NEXT: vrsub.vi v9, v8, 0
59 ; RVD-NEXT: vand.vv v9, v8, v9
60 ; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma
61 ; RVD-NEXT: vzext.vf2 v10, v9
62 ; RVD-NEXT: vfwcvt.f.xu.v v12, v10
63 ; RVD-NEXT: vnsrl.wi v10, v12, 23
64 ; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
65 ; RVD-NEXT: vnsrl.wi v9, v10, 0
66 ; RVD-NEXT: li a1, 127
67 ; RVD-NEXT: vmseq.vi v0, v8, 0
68 ; RVD-NEXT: vsub.vx v8, v9, a1
69 ; RVD-NEXT: vmerge.vim v8, v8, 8, v0
70 ; RVD-NEXT: vse8.v v8, (a0)
73 ; ZVBB-LABEL: cttz_v16i8:
75 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
76 ; ZVBB-NEXT: vle8.v v8, (a0)
77 ; ZVBB-NEXT: vctz.v v8, v8
78 ; ZVBB-NEXT: vse8.v v8, (a0)
80 %a = load <16 x i8>, ptr %x
81 %b = load <16 x i8>, ptr %y
82 %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
83 store <16 x i8> %c, ptr %x
86 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
88 define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
89 ; RVI-LABEL: cttz_v8i16:
91 ; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma
92 ; RVI-NEXT: vle16.v v8, (a0)
94 ; RVI-NEXT: vsub.vx v9, v8, a1
95 ; RVI-NEXT: vnot.v v8, v8
96 ; RVI-NEXT: vand.vv v8, v8, v9
97 ; RVI-NEXT: vsrl.vi v9, v8, 1
99 ; RVI-NEXT: addi a1, a1, 1365
100 ; RVI-NEXT: vand.vx v9, v9, a1
101 ; RVI-NEXT: vsub.vv v8, v8, v9
102 ; RVI-NEXT: lui a1, 3
103 ; RVI-NEXT: addi a1, a1, 819
104 ; RVI-NEXT: vand.vx v9, v8, a1
105 ; RVI-NEXT: vsrl.vi v8, v8, 2
106 ; RVI-NEXT: vand.vx v8, v8, a1
107 ; RVI-NEXT: vadd.vv v8, v9, v8
108 ; RVI-NEXT: vsrl.vi v9, v8, 4
109 ; RVI-NEXT: vadd.vv v8, v8, v9
110 ; RVI-NEXT: lui a1, 1
111 ; RVI-NEXT: addi a1, a1, -241
112 ; RVI-NEXT: vand.vx v8, v8, a1
113 ; RVI-NEXT: li a1, 257
114 ; RVI-NEXT: vmul.vx v8, v8, a1
115 ; RVI-NEXT: vsrl.vi v8, v8, 8
116 ; RVI-NEXT: vse16.v v8, (a0)
119 ; RVF-LABEL: cttz_v8i16:
121 ; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma
122 ; RVF-NEXT: vle16.v v8, (a0)
123 ; RVF-NEXT: vrsub.vi v9, v8, 0
124 ; RVF-NEXT: vand.vv v9, v8, v9
125 ; RVF-NEXT: vfwcvt.f.xu.v v10, v9
126 ; RVF-NEXT: vnsrl.wi v9, v10, 23
127 ; RVF-NEXT: li a1, 127
128 ; RVF-NEXT: vsub.vx v9, v9, a1
129 ; RVF-NEXT: vmseq.vi v0, v8, 0
130 ; RVF-NEXT: li a1, 16
131 ; RVF-NEXT: vmerge.vxm v8, v9, a1, v0
132 ; RVF-NEXT: vse16.v v8, (a0)
135 ; RVD-LABEL: cttz_v8i16:
137 ; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma
138 ; RVD-NEXT: vle16.v v8, (a0)
139 ; RVD-NEXT: vrsub.vi v9, v8, 0
140 ; RVD-NEXT: vand.vv v9, v8, v9
141 ; RVD-NEXT: vfwcvt.f.xu.v v10, v9
142 ; RVD-NEXT: vnsrl.wi v9, v10, 23
143 ; RVD-NEXT: li a1, 127
144 ; RVD-NEXT: vsub.vx v9, v9, a1
145 ; RVD-NEXT: vmseq.vi v0, v8, 0
146 ; RVD-NEXT: li a1, 16
147 ; RVD-NEXT: vmerge.vxm v8, v9, a1, v0
148 ; RVD-NEXT: vse16.v v8, (a0)
151 ; ZVBB-LABEL: cttz_v8i16:
153 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
154 ; ZVBB-NEXT: vle16.v v8, (a0)
155 ; ZVBB-NEXT: vctz.v v8, v8
156 ; ZVBB-NEXT: vse16.v v8, (a0)
158 %a = load <8 x i16>, ptr %x
159 %b = load <8 x i16>, ptr %y
160 %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
161 store <8 x i16> %c, ptr %x
164 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
166 define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
167 ; RVI-LABEL: cttz_v4i32:
169 ; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma
170 ; RVI-NEXT: vle32.v v8, (a0)
172 ; RVI-NEXT: vsub.vx v9, v8, a1
173 ; RVI-NEXT: vnot.v v8, v8
174 ; RVI-NEXT: vand.vv v8, v8, v9
175 ; RVI-NEXT: vsrl.vi v9, v8, 1
176 ; RVI-NEXT: lui a1, 349525
177 ; RVI-NEXT: addi a1, a1, 1365
178 ; RVI-NEXT: vand.vx v9, v9, a1
179 ; RVI-NEXT: vsub.vv v8, v8, v9
180 ; RVI-NEXT: lui a1, 209715
181 ; RVI-NEXT: addi a1, a1, 819
182 ; RVI-NEXT: vand.vx v9, v8, a1
183 ; RVI-NEXT: vsrl.vi v8, v8, 2
184 ; RVI-NEXT: vand.vx v8, v8, a1
185 ; RVI-NEXT: vadd.vv v8, v9, v8
186 ; RVI-NEXT: vsrl.vi v9, v8, 4
187 ; RVI-NEXT: vadd.vv v8, v8, v9
188 ; RVI-NEXT: lui a1, 61681
189 ; RVI-NEXT: addi a1, a1, -241
190 ; RVI-NEXT: vand.vx v8, v8, a1
191 ; RVI-NEXT: lui a1, 4112
192 ; RVI-NEXT: addi a1, a1, 257
193 ; RVI-NEXT: vmul.vx v8, v8, a1
194 ; RVI-NEXT: vsrl.vi v8, v8, 24
195 ; RVI-NEXT: vse32.v v8, (a0)
198 ; RVF-LABEL: cttz_v4i32:
200 ; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma
201 ; RVF-NEXT: vle32.v v8, (a0)
202 ; RVF-NEXT: vrsub.vi v9, v8, 0
203 ; RVF-NEXT: vand.vv v9, v8, v9
204 ; RVF-NEXT: fsrmi a1, 1
205 ; RVF-NEXT: vfcvt.f.xu.v v9, v9
207 ; RVF-NEXT: vsrl.vi v9, v9, 23
208 ; RVF-NEXT: li a1, 127
209 ; RVF-NEXT: vsub.vx v9, v9, a1
210 ; RVF-NEXT: vmseq.vi v0, v8, 0
211 ; RVF-NEXT: li a1, 32
212 ; RVF-NEXT: vmerge.vxm v8, v9, a1, v0
213 ; RVF-NEXT: vse32.v v8, (a0)
216 ; RVD-LABEL: cttz_v4i32:
218 ; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma
219 ; RVD-NEXT: vle32.v v8, (a0)
220 ; RVD-NEXT: vrsub.vi v9, v8, 0
221 ; RVD-NEXT: vand.vv v9, v8, v9
222 ; RVD-NEXT: vfwcvt.f.xu.v v10, v9
223 ; RVD-NEXT: li a1, 52
224 ; RVD-NEXT: vnsrl.wx v9, v10, a1
225 ; RVD-NEXT: li a1, 1023
226 ; RVD-NEXT: vsub.vx v9, v9, a1
227 ; RVD-NEXT: vmseq.vi v0, v8, 0
228 ; RVD-NEXT: li a1, 32
229 ; RVD-NEXT: vmerge.vxm v8, v9, a1, v0
230 ; RVD-NEXT: vse32.v v8, (a0)
233 ; ZVBB-LABEL: cttz_v4i32:
235 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
236 ; ZVBB-NEXT: vle32.v v8, (a0)
237 ; ZVBB-NEXT: vctz.v v8, v8
238 ; ZVBB-NEXT: vse32.v v8, (a0)
240 %a = load <4 x i32>, ptr %x
241 %b = load <4 x i32>, ptr %y
242 %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
243 store <4 x i32> %c, ptr %x
246 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
248 define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
249 ; RV32I-LABEL: cttz_v2i64:
251 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
252 ; RV32I-NEXT: vle64.v v8, (a0)
253 ; RV32I-NEXT: li a1, 1
254 ; RV32I-NEXT: vsub.vx v9, v8, a1
255 ; RV32I-NEXT: vnot.v v8, v8
256 ; RV32I-NEXT: vand.vv v8, v8, v9
257 ; RV32I-NEXT: vsrl.vi v9, v8, 1
258 ; RV32I-NEXT: lui a1, 349525
259 ; RV32I-NEXT: addi a1, a1, 1365
260 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
261 ; RV32I-NEXT: vmv.v.x v10, a1
262 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
263 ; RV32I-NEXT: vand.vv v9, v9, v10
264 ; RV32I-NEXT: vsub.vv v8, v8, v9
265 ; RV32I-NEXT: lui a1, 209715
266 ; RV32I-NEXT: addi a1, a1, 819
267 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
268 ; RV32I-NEXT: vmv.v.x v9, a1
269 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
270 ; RV32I-NEXT: vand.vv v10, v8, v9
271 ; RV32I-NEXT: vsrl.vi v8, v8, 2
272 ; RV32I-NEXT: vand.vv v8, v8, v9
273 ; RV32I-NEXT: vadd.vv v8, v10, v8
274 ; RV32I-NEXT: vsrl.vi v9, v8, 4
275 ; RV32I-NEXT: vadd.vv v8, v8, v9
276 ; RV32I-NEXT: lui a1, 61681
277 ; RV32I-NEXT: addi a1, a1, -241
278 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
279 ; RV32I-NEXT: vmv.v.x v9, a1
280 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
281 ; RV32I-NEXT: vand.vv v8, v8, v9
282 ; RV32I-NEXT: lui a1, 4112
283 ; RV32I-NEXT: addi a1, a1, 257
284 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
285 ; RV32I-NEXT: vmv.v.x v9, a1
286 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
287 ; RV32I-NEXT: vmul.vv v8, v8, v9
288 ; RV32I-NEXT: li a1, 56
289 ; RV32I-NEXT: vsrl.vx v8, v8, a1
290 ; RV32I-NEXT: vse64.v v8, (a0)
293 ; RV64I-LABEL: cttz_v2i64:
295 ; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
296 ; RV64I-NEXT: vle64.v v8, (a0)
297 ; RV64I-NEXT: li a1, 1
298 ; RV64I-NEXT: vsub.vx v9, v8, a1
299 ; RV64I-NEXT: vnot.v v8, v8
300 ; RV64I-NEXT: vand.vv v8, v8, v9
301 ; RV64I-NEXT: vsrl.vi v9, v8, 1
302 ; RV64I-NEXT: lui a1, 349525
303 ; RV64I-NEXT: addiw a1, a1, 1365
304 ; RV64I-NEXT: slli a2, a1, 32
305 ; RV64I-NEXT: add a1, a1, a2
306 ; RV64I-NEXT: vand.vx v9, v9, a1
307 ; RV64I-NEXT: vsub.vv v8, v8, v9
308 ; RV64I-NEXT: lui a1, 209715
309 ; RV64I-NEXT: addiw a1, a1, 819
310 ; RV64I-NEXT: slli a2, a1, 32
311 ; RV64I-NEXT: add a1, a1, a2
312 ; RV64I-NEXT: vand.vx v9, v8, a1
313 ; RV64I-NEXT: vsrl.vi v8, v8, 2
314 ; RV64I-NEXT: vand.vx v8, v8, a1
315 ; RV64I-NEXT: vadd.vv v8, v9, v8
316 ; RV64I-NEXT: vsrl.vi v9, v8, 4
317 ; RV64I-NEXT: vadd.vv v8, v8, v9
318 ; RV64I-NEXT: lui a1, 61681
319 ; RV64I-NEXT: addiw a1, a1, -241
320 ; RV64I-NEXT: slli a2, a1, 32
321 ; RV64I-NEXT: add a1, a1, a2
322 ; RV64I-NEXT: vand.vx v8, v8, a1
323 ; RV64I-NEXT: lui a1, 4112
324 ; RV64I-NEXT: addiw a1, a1, 257
325 ; RV64I-NEXT: slli a2, a1, 32
326 ; RV64I-NEXT: add a1, a1, a2
327 ; RV64I-NEXT: vmul.vx v8, v8, a1
328 ; RV64I-NEXT: li a1, 56
329 ; RV64I-NEXT: vsrl.vx v8, v8, a1
330 ; RV64I-NEXT: vse64.v v8, (a0)
333 ; RVF-LABEL: cttz_v2i64:
335 ; RVF-NEXT: vsetivli zero, 2, e64, m1, ta, ma
336 ; RVF-NEXT: vle64.v v8, (a0)
337 ; RVF-NEXT: vrsub.vi v9, v8, 0
338 ; RVF-NEXT: vand.vv v9, v8, v9
339 ; RVF-NEXT: fsrmi a1, 1
340 ; RVF-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
341 ; RVF-NEXT: vfncvt.f.xu.w v10, v9
343 ; RVF-NEXT: vsrl.vi v9, v10, 23
344 ; RVF-NEXT: li a1, 127
345 ; RVF-NEXT: vwsubu.vx v10, v9, a1
346 ; RVF-NEXT: vsetvli zero, zero, e64, m1, ta, ma
347 ; RVF-NEXT: vmseq.vi v0, v8, 0
348 ; RVF-NEXT: li a1, 64
349 ; RVF-NEXT: vmerge.vxm v8, v10, a1, v0
350 ; RVF-NEXT: vse64.v v8, (a0)
353 ; RVD-LABEL: cttz_v2i64:
355 ; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma
356 ; RVD-NEXT: vle64.v v8, (a0)
357 ; RVD-NEXT: vrsub.vi v9, v8, 0
358 ; RVD-NEXT: vand.vv v9, v8, v9
359 ; RVD-NEXT: fsrmi a1, 1
360 ; RVD-NEXT: vfcvt.f.xu.v v9, v9
362 ; RVD-NEXT: li a1, 52
363 ; RVD-NEXT: vsrl.vx v9, v9, a1
364 ; RVD-NEXT: li a1, 1023
365 ; RVD-NEXT: vsub.vx v9, v9, a1
366 ; RVD-NEXT: vmseq.vi v0, v8, 0
367 ; RVD-NEXT: li a1, 64
368 ; RVD-NEXT: vmerge.vxm v8, v9, a1, v0
369 ; RVD-NEXT: vse64.v v8, (a0)
372 ; ZVBB-LABEL: cttz_v2i64:
374 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
375 ; ZVBB-NEXT: vle64.v v8, (a0)
376 ; ZVBB-NEXT: vctz.v v8, v8
377 ; ZVBB-NEXT: vse64.v v8, (a0)
379 %a = load <2 x i64>, ptr %x
380 %b = load <2 x i64>, ptr %y
381 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
382 store <2 x i64> %c, ptr %x
385 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
387 define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
388 ; RVI-LABEL: cttz_v32i8:
390 ; RVI-NEXT: li a1, 32
391 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma
392 ; RVI-NEXT: vle8.v v8, (a0)
394 ; RVI-NEXT: vsub.vx v10, v8, a1
395 ; RVI-NEXT: vnot.v v8, v8
396 ; RVI-NEXT: vand.vv v8, v8, v10
397 ; RVI-NEXT: vsrl.vi v10, v8, 1
398 ; RVI-NEXT: li a1, 85
399 ; RVI-NEXT: vand.vx v10, v10, a1
400 ; RVI-NEXT: vsub.vv v8, v8, v10
401 ; RVI-NEXT: li a1, 51
402 ; RVI-NEXT: vand.vx v10, v8, a1
403 ; RVI-NEXT: vsrl.vi v8, v8, 2
404 ; RVI-NEXT: vand.vx v8, v8, a1
405 ; RVI-NEXT: vadd.vv v8, v10, v8
406 ; RVI-NEXT: vsrl.vi v10, v8, 4
407 ; RVI-NEXT: vadd.vv v8, v8, v10
408 ; RVI-NEXT: vand.vi v8, v8, 15
409 ; RVI-NEXT: vse8.v v8, (a0)
412 ; RVF-LABEL: cttz_v32i8:
414 ; RVF-NEXT: li a1, 32
415 ; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma
416 ; RVF-NEXT: vle8.v v8, (a0)
417 ; RVF-NEXT: vrsub.vi v10, v8, 0
418 ; RVF-NEXT: vand.vv v10, v8, v10
419 ; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma
420 ; RVF-NEXT: vzext.vf2 v12, v10
421 ; RVF-NEXT: vfwcvt.f.xu.v v16, v12
422 ; RVF-NEXT: vnsrl.wi v12, v16, 23
423 ; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
424 ; RVF-NEXT: vnsrl.wi v10, v12, 0
425 ; RVF-NEXT: li a1, 127
426 ; RVF-NEXT: vmseq.vi v0, v8, 0
427 ; RVF-NEXT: vsub.vx v8, v10, a1
428 ; RVF-NEXT: vmerge.vim v8, v8, 8, v0
429 ; RVF-NEXT: vse8.v v8, (a0)
432 ; RVD-LABEL: cttz_v32i8:
434 ; RVD-NEXT: li a1, 32
435 ; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma
436 ; RVD-NEXT: vle8.v v8, (a0)
437 ; RVD-NEXT: vrsub.vi v10, v8, 0
438 ; RVD-NEXT: vand.vv v10, v8, v10
439 ; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma
440 ; RVD-NEXT: vzext.vf2 v12, v10
441 ; RVD-NEXT: vfwcvt.f.xu.v v16, v12
442 ; RVD-NEXT: vnsrl.wi v12, v16, 23
443 ; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
444 ; RVD-NEXT: vnsrl.wi v10, v12, 0
445 ; RVD-NEXT: li a1, 127
446 ; RVD-NEXT: vmseq.vi v0, v8, 0
447 ; RVD-NEXT: vsub.vx v8, v10, a1
448 ; RVD-NEXT: vmerge.vim v8, v8, 8, v0
449 ; RVD-NEXT: vse8.v v8, (a0)
452 ; ZVBB-LABEL: cttz_v32i8:
454 ; ZVBB-NEXT: li a1, 32
455 ; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
456 ; ZVBB-NEXT: vle8.v v8, (a0)
457 ; ZVBB-NEXT: vctz.v v8, v8
458 ; ZVBB-NEXT: vse8.v v8, (a0)
460 %a = load <32 x i8>, ptr %x
461 %b = load <32 x i8>, ptr %y
462 %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
463 store <32 x i8> %c, ptr %x
466 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
468 define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
469 ; RVI-LABEL: cttz_v16i16:
471 ; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma
472 ; RVI-NEXT: vle16.v v8, (a0)
474 ; RVI-NEXT: vsub.vx v10, v8, a1
475 ; RVI-NEXT: vnot.v v8, v8
476 ; RVI-NEXT: vand.vv v8, v8, v10
477 ; RVI-NEXT: vsrl.vi v10, v8, 1
478 ; RVI-NEXT: lui a1, 5
479 ; RVI-NEXT: addi a1, a1, 1365
480 ; RVI-NEXT: vand.vx v10, v10, a1
481 ; RVI-NEXT: vsub.vv v8, v8, v10
482 ; RVI-NEXT: lui a1, 3
483 ; RVI-NEXT: addi a1, a1, 819
484 ; RVI-NEXT: vand.vx v10, v8, a1
485 ; RVI-NEXT: vsrl.vi v8, v8, 2
486 ; RVI-NEXT: vand.vx v8, v8, a1
487 ; RVI-NEXT: vadd.vv v8, v10, v8
488 ; RVI-NEXT: vsrl.vi v10, v8, 4
489 ; RVI-NEXT: vadd.vv v8, v8, v10
490 ; RVI-NEXT: lui a1, 1
491 ; RVI-NEXT: addi a1, a1, -241
492 ; RVI-NEXT: vand.vx v8, v8, a1
493 ; RVI-NEXT: li a1, 257
494 ; RVI-NEXT: vmul.vx v8, v8, a1
495 ; RVI-NEXT: vsrl.vi v8, v8, 8
496 ; RVI-NEXT: vse16.v v8, (a0)
499 ; RVF-LABEL: cttz_v16i16:
501 ; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
502 ; RVF-NEXT: vle16.v v8, (a0)
503 ; RVF-NEXT: vrsub.vi v10, v8, 0
504 ; RVF-NEXT: vand.vv v10, v8, v10
505 ; RVF-NEXT: vfwcvt.f.xu.v v12, v10
506 ; RVF-NEXT: vnsrl.wi v10, v12, 23
507 ; RVF-NEXT: li a1, 127
508 ; RVF-NEXT: vsub.vx v10, v10, a1
509 ; RVF-NEXT: vmseq.vi v0, v8, 0
510 ; RVF-NEXT: li a1, 16
511 ; RVF-NEXT: vmerge.vxm v8, v10, a1, v0
512 ; RVF-NEXT: vse16.v v8, (a0)
515 ; RVD-LABEL: cttz_v16i16:
517 ; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
518 ; RVD-NEXT: vle16.v v8, (a0)
519 ; RVD-NEXT: vrsub.vi v10, v8, 0
520 ; RVD-NEXT: vand.vv v10, v8, v10
521 ; RVD-NEXT: vfwcvt.f.xu.v v12, v10
522 ; RVD-NEXT: vnsrl.wi v10, v12, 23
523 ; RVD-NEXT: li a1, 127
524 ; RVD-NEXT: vsub.vx v10, v10, a1
525 ; RVD-NEXT: vmseq.vi v0, v8, 0
526 ; RVD-NEXT: li a1, 16
527 ; RVD-NEXT: vmerge.vxm v8, v10, a1, v0
528 ; RVD-NEXT: vse16.v v8, (a0)
531 ; ZVBB-LABEL: cttz_v16i16:
533 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
534 ; ZVBB-NEXT: vle16.v v8, (a0)
535 ; ZVBB-NEXT: vctz.v v8, v8
536 ; ZVBB-NEXT: vse16.v v8, (a0)
538 %a = load <16 x i16>, ptr %x
539 %b = load <16 x i16>, ptr %y
540 %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
541 store <16 x i16> %c, ptr %x
544 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
546 define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
547 ; RVI-LABEL: cttz_v8i32:
549 ; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma
550 ; RVI-NEXT: vle32.v v8, (a0)
552 ; RVI-NEXT: vsub.vx v10, v8, a1
553 ; RVI-NEXT: vnot.v v8, v8
554 ; RVI-NEXT: vand.vv v8, v8, v10
555 ; RVI-NEXT: vsrl.vi v10, v8, 1
556 ; RVI-NEXT: lui a1, 349525
557 ; RVI-NEXT: addi a1, a1, 1365
558 ; RVI-NEXT: vand.vx v10, v10, a1
559 ; RVI-NEXT: vsub.vv v8, v8, v10
560 ; RVI-NEXT: lui a1, 209715
561 ; RVI-NEXT: addi a1, a1, 819
562 ; RVI-NEXT: vand.vx v10, v8, a1
563 ; RVI-NEXT: vsrl.vi v8, v8, 2
564 ; RVI-NEXT: vand.vx v8, v8, a1
565 ; RVI-NEXT: vadd.vv v8, v10, v8
566 ; RVI-NEXT: vsrl.vi v10, v8, 4
567 ; RVI-NEXT: vadd.vv v8, v8, v10
568 ; RVI-NEXT: lui a1, 61681
569 ; RVI-NEXT: addi a1, a1, -241
570 ; RVI-NEXT: vand.vx v8, v8, a1
571 ; RVI-NEXT: lui a1, 4112
572 ; RVI-NEXT: addi a1, a1, 257
573 ; RVI-NEXT: vmul.vx v8, v8, a1
574 ; RVI-NEXT: vsrl.vi v8, v8, 24
575 ; RVI-NEXT: vse32.v v8, (a0)
578 ; RVF-LABEL: cttz_v8i32:
580 ; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma
581 ; RVF-NEXT: vle32.v v8, (a0)
582 ; RVF-NEXT: vrsub.vi v10, v8, 0
583 ; RVF-NEXT: vand.vv v10, v8, v10
584 ; RVF-NEXT: fsrmi a1, 1
585 ; RVF-NEXT: vfcvt.f.xu.v v10, v10
587 ; RVF-NEXT: vsrl.vi v10, v10, 23
588 ; RVF-NEXT: li a1, 127
589 ; RVF-NEXT: vsub.vx v10, v10, a1
590 ; RVF-NEXT: vmseq.vi v0, v8, 0
591 ; RVF-NEXT: li a1, 32
592 ; RVF-NEXT: vmerge.vxm v8, v10, a1, v0
593 ; RVF-NEXT: vse32.v v8, (a0)
596 ; RVD-LABEL: cttz_v8i32:
598 ; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma
599 ; RVD-NEXT: vle32.v v8, (a0)
600 ; RVD-NEXT: vrsub.vi v10, v8, 0
601 ; RVD-NEXT: vand.vv v10, v8, v10
602 ; RVD-NEXT: vfwcvt.f.xu.v v12, v10
603 ; RVD-NEXT: li a1, 52
604 ; RVD-NEXT: vnsrl.wx v10, v12, a1
605 ; RVD-NEXT: li a1, 1023
606 ; RVD-NEXT: vsub.vx v10, v10, a1
607 ; RVD-NEXT: vmseq.vi v0, v8, 0
608 ; RVD-NEXT: li a1, 32
609 ; RVD-NEXT: vmerge.vxm v8, v10, a1, v0
610 ; RVD-NEXT: vse32.v v8, (a0)
613 ; ZVBB-LABEL: cttz_v8i32:
615 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
616 ; ZVBB-NEXT: vle32.v v8, (a0)
617 ; ZVBB-NEXT: vctz.v v8, v8
618 ; ZVBB-NEXT: vse32.v v8, (a0)
620 %a = load <8 x i32>, ptr %x
621 %b = load <8 x i32>, ptr %y
622 %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
623 store <8 x i32> %c, ptr %x
626 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
628 define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
629 ; RV32I-LABEL: cttz_v4i64:
631 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
632 ; RV32I-NEXT: vle64.v v8, (a0)
633 ; RV32I-NEXT: li a1, 1
634 ; RV32I-NEXT: vsub.vx v10, v8, a1
635 ; RV32I-NEXT: vnot.v v8, v8
636 ; RV32I-NEXT: vand.vv v8, v8, v10
637 ; RV32I-NEXT: vsrl.vi v10, v8, 1
638 ; RV32I-NEXT: lui a1, 349525
639 ; RV32I-NEXT: addi a1, a1, 1365
640 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
641 ; RV32I-NEXT: vmv.v.x v12, a1
642 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
643 ; RV32I-NEXT: vand.vv v10, v10, v12
644 ; RV32I-NEXT: vsub.vv v8, v8, v10
645 ; RV32I-NEXT: lui a1, 209715
646 ; RV32I-NEXT: addi a1, a1, 819
647 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
648 ; RV32I-NEXT: vmv.v.x v10, a1
649 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
650 ; RV32I-NEXT: vand.vv v12, v8, v10
651 ; RV32I-NEXT: vsrl.vi v8, v8, 2
652 ; RV32I-NEXT: vand.vv v8, v8, v10
653 ; RV32I-NEXT: vadd.vv v8, v12, v8
654 ; RV32I-NEXT: vsrl.vi v10, v8, 4
655 ; RV32I-NEXT: vadd.vv v8, v8, v10
656 ; RV32I-NEXT: lui a1, 61681
657 ; RV32I-NEXT: addi a1, a1, -241
658 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
659 ; RV32I-NEXT: vmv.v.x v10, a1
660 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
661 ; RV32I-NEXT: vand.vv v8, v8, v10
662 ; RV32I-NEXT: lui a1, 4112
663 ; RV32I-NEXT: addi a1, a1, 257
664 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
665 ; RV32I-NEXT: vmv.v.x v10, a1
666 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
667 ; RV32I-NEXT: vmul.vv v8, v8, v10
668 ; RV32I-NEXT: li a1, 56
669 ; RV32I-NEXT: vsrl.vx v8, v8, a1
670 ; RV32I-NEXT: vse64.v v8, (a0)
673 ; RV64I-LABEL: cttz_v4i64:
675 ; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
676 ; RV64I-NEXT: vle64.v v8, (a0)
677 ; RV64I-NEXT: li a1, 1
678 ; RV64I-NEXT: vsub.vx v10, v8, a1
679 ; RV64I-NEXT: vnot.v v8, v8
680 ; RV64I-NEXT: vand.vv v8, v8, v10
681 ; RV64I-NEXT: vsrl.vi v10, v8, 1
682 ; RV64I-NEXT: lui a1, 349525
683 ; RV64I-NEXT: addiw a1, a1, 1365
684 ; RV64I-NEXT: slli a2, a1, 32
685 ; RV64I-NEXT: add a1, a1, a2
686 ; RV64I-NEXT: vand.vx v10, v10, a1
687 ; RV64I-NEXT: vsub.vv v8, v8, v10
688 ; RV64I-NEXT: lui a1, 209715
689 ; RV64I-NEXT: addiw a1, a1, 819
690 ; RV64I-NEXT: slli a2, a1, 32
691 ; RV64I-NEXT: add a1, a1, a2
692 ; RV64I-NEXT: vand.vx v10, v8, a1
693 ; RV64I-NEXT: vsrl.vi v8, v8, 2
694 ; RV64I-NEXT: vand.vx v8, v8, a1
695 ; RV64I-NEXT: vadd.vv v8, v10, v8
696 ; RV64I-NEXT: vsrl.vi v10, v8, 4
697 ; RV64I-NEXT: vadd.vv v8, v8, v10
698 ; RV64I-NEXT: lui a1, 61681
699 ; RV64I-NEXT: addiw a1, a1, -241
700 ; RV64I-NEXT: slli a2, a1, 32
701 ; RV64I-NEXT: add a1, a1, a2
702 ; RV64I-NEXT: vand.vx v8, v8, a1
703 ; RV64I-NEXT: lui a1, 4112
704 ; RV64I-NEXT: addiw a1, a1, 257
705 ; RV64I-NEXT: slli a2, a1, 32
706 ; RV64I-NEXT: add a1, a1, a2
707 ; RV64I-NEXT: vmul.vx v8, v8, a1
708 ; RV64I-NEXT: li a1, 56
709 ; RV64I-NEXT: vsrl.vx v8, v8, a1
710 ; RV64I-NEXT: vse64.v v8, (a0)
713 ; RVF-LABEL: cttz_v4i64:
715 ; RVF-NEXT: vsetivli zero, 4, e64, m2, ta, ma
716 ; RVF-NEXT: vle64.v v8, (a0)
717 ; RVF-NEXT: vrsub.vi v10, v8, 0
718 ; RVF-NEXT: vand.vv v10, v8, v10
719 ; RVF-NEXT: fsrmi a1, 1
720 ; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma
721 ; RVF-NEXT: vfncvt.f.xu.w v12, v10
723 ; RVF-NEXT: vsrl.vi v10, v12, 23
724 ; RVF-NEXT: li a1, 127
725 ; RVF-NEXT: vwsubu.vx v12, v10, a1
726 ; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma
727 ; RVF-NEXT: vmseq.vi v0, v8, 0
728 ; RVF-NEXT: li a1, 64
729 ; RVF-NEXT: vmerge.vxm v8, v12, a1, v0
730 ; RVF-NEXT: vse64.v v8, (a0)
733 ; RVD-LABEL: cttz_v4i64:
735 ; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma
736 ; RVD-NEXT: vle64.v v8, (a0)
737 ; RVD-NEXT: vrsub.vi v10, v8, 0
738 ; RVD-NEXT: vand.vv v10, v8, v10
739 ; RVD-NEXT: fsrmi a1, 1
740 ; RVD-NEXT: vfcvt.f.xu.v v10, v10
742 ; RVD-NEXT: li a1, 52
743 ; RVD-NEXT: vsrl.vx v10, v10, a1
744 ; RVD-NEXT: li a1, 1023
745 ; RVD-NEXT: vsub.vx v10, v10, a1
746 ; RVD-NEXT: vmseq.vi v0, v8, 0
747 ; RVD-NEXT: li a1, 64
748 ; RVD-NEXT: vmerge.vxm v8, v10, a1, v0
749 ; RVD-NEXT: vse64.v v8, (a0)
752 ; ZVBB-LABEL: cttz_v4i64:
754 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
755 ; ZVBB-NEXT: vle64.v v8, (a0)
756 ; ZVBB-NEXT: vctz.v v8, v8
757 ; ZVBB-NEXT: vse64.v v8, (a0)
759 %a = load <4 x i64>, ptr %x
760 %b = load <4 x i64>, ptr %y
761 %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
762 store <4 x i64> %c, ptr %x
765 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
767 define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
768 ; RVI-LABEL: cttz_zero_undef_v16i8:
770 ; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma
771 ; RVI-NEXT: vle8.v v8, (a0)
773 ; RVI-NEXT: vsub.vx v9, v8, a1
774 ; RVI-NEXT: vnot.v v8, v8
775 ; RVI-NEXT: vand.vv v8, v8, v9
776 ; RVI-NEXT: vsrl.vi v9, v8, 1
777 ; RVI-NEXT: li a1, 85
778 ; RVI-NEXT: vand.vx v9, v9, a1
779 ; RVI-NEXT: vsub.vv v8, v8, v9
780 ; RVI-NEXT: li a1, 51
781 ; RVI-NEXT: vand.vx v9, v8, a1
782 ; RVI-NEXT: vsrl.vi v8, v8, 2
783 ; RVI-NEXT: vand.vx v8, v8, a1
784 ; RVI-NEXT: vadd.vv v8, v9, v8
785 ; RVI-NEXT: vsrl.vi v9, v8, 4
786 ; RVI-NEXT: vadd.vv v8, v8, v9
787 ; RVI-NEXT: vand.vi v8, v8, 15
788 ; RVI-NEXT: vse8.v v8, (a0)
791 ; RVF-LABEL: cttz_zero_undef_v16i8:
793 ; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma
794 ; RVF-NEXT: vle8.v v8, (a0)
795 ; RVF-NEXT: vrsub.vi v9, v8, 0
796 ; RVF-NEXT: vand.vv v8, v8, v9
797 ; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma
798 ; RVF-NEXT: vzext.vf2 v10, v8
799 ; RVF-NEXT: vfwcvt.f.xu.v v12, v10
800 ; RVF-NEXT: vnsrl.wi v8, v12, 23
801 ; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
802 ; RVF-NEXT: vnsrl.wi v10, v8, 0
803 ; RVF-NEXT: li a1, 127
804 ; RVF-NEXT: vsub.vx v8, v10, a1
805 ; RVF-NEXT: vse8.v v8, (a0)
808 ; RVD-LABEL: cttz_zero_undef_v16i8:
810 ; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma
811 ; RVD-NEXT: vle8.v v8, (a0)
812 ; RVD-NEXT: vrsub.vi v9, v8, 0
813 ; RVD-NEXT: vand.vv v8, v8, v9
814 ; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma
815 ; RVD-NEXT: vzext.vf2 v10, v8
816 ; RVD-NEXT: vfwcvt.f.xu.v v12, v10
817 ; RVD-NEXT: vnsrl.wi v8, v12, 23
818 ; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
819 ; RVD-NEXT: vnsrl.wi v10, v8, 0
820 ; RVD-NEXT: li a1, 127
821 ; RVD-NEXT: vsub.vx v8, v10, a1
822 ; RVD-NEXT: vse8.v v8, (a0)
825 ; ZVBB-LABEL: cttz_zero_undef_v16i8:
827 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
828 ; ZVBB-NEXT: vle8.v v8, (a0)
829 ; ZVBB-NEXT: vctz.v v8, v8
830 ; ZVBB-NEXT: vse8.v v8, (a0)
832 %a = load <16 x i8>, ptr %x
833 %b = load <16 x i8>, ptr %y
834 %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
835 store <16 x i8> %c, ptr %x
839 define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
840 ; RVI-LABEL: cttz_zero_undef_v8i16:
842 ; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma
843 ; RVI-NEXT: vle16.v v8, (a0)
845 ; RVI-NEXT: vsub.vx v9, v8, a1
846 ; RVI-NEXT: vnot.v v8, v8
847 ; RVI-NEXT: vand.vv v8, v8, v9
848 ; RVI-NEXT: vsrl.vi v9, v8, 1
849 ; RVI-NEXT: lui a1, 5
850 ; RVI-NEXT: addi a1, a1, 1365
851 ; RVI-NEXT: vand.vx v9, v9, a1
852 ; RVI-NEXT: vsub.vv v8, v8, v9
853 ; RVI-NEXT: lui a1, 3
854 ; RVI-NEXT: addi a1, a1, 819
855 ; RVI-NEXT: vand.vx v9, v8, a1
856 ; RVI-NEXT: vsrl.vi v8, v8, 2
857 ; RVI-NEXT: vand.vx v8, v8, a1
858 ; RVI-NEXT: vadd.vv v8, v9, v8
859 ; RVI-NEXT: vsrl.vi v9, v8, 4
860 ; RVI-NEXT: vadd.vv v8, v8, v9
861 ; RVI-NEXT: lui a1, 1
862 ; RVI-NEXT: addi a1, a1, -241
863 ; RVI-NEXT: vand.vx v8, v8, a1
864 ; RVI-NEXT: li a1, 257
865 ; RVI-NEXT: vmul.vx v8, v8, a1
866 ; RVI-NEXT: vsrl.vi v8, v8, 8
867 ; RVI-NEXT: vse16.v v8, (a0)
870 ; RVF-LABEL: cttz_zero_undef_v8i16:
872 ; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma
873 ; RVF-NEXT: vle16.v v8, (a0)
874 ; RVF-NEXT: vrsub.vi v9, v8, 0
875 ; RVF-NEXT: vand.vv v8, v8, v9
876 ; RVF-NEXT: vfwcvt.f.xu.v v10, v8
877 ; RVF-NEXT: vnsrl.wi v8, v10, 23
878 ; RVF-NEXT: li a1, 127
879 ; RVF-NEXT: vsub.vx v8, v8, a1
880 ; RVF-NEXT: vse16.v v8, (a0)
883 ; RVD-LABEL: cttz_zero_undef_v8i16:
885 ; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma
886 ; RVD-NEXT: vle16.v v8, (a0)
887 ; RVD-NEXT: vrsub.vi v9, v8, 0
888 ; RVD-NEXT: vand.vv v8, v8, v9
889 ; RVD-NEXT: vfwcvt.f.xu.v v10, v8
890 ; RVD-NEXT: vnsrl.wi v8, v10, 23
891 ; RVD-NEXT: li a1, 127
892 ; RVD-NEXT: vsub.vx v8, v8, a1
893 ; RVD-NEXT: vse16.v v8, (a0)
896 ; ZVBB-LABEL: cttz_zero_undef_v8i16:
898 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
899 ; ZVBB-NEXT: vle16.v v8, (a0)
900 ; ZVBB-NEXT: vctz.v v8, v8
901 ; ZVBB-NEXT: vse16.v v8, (a0)
903 %a = load <8 x i16>, ptr %x
904 %b = load <8 x i16>, ptr %y
905 %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
906 store <8 x i16> %c, ptr %x
910 define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
911 ; RVI-LABEL: cttz_zero_undef_v4i32:
913 ; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma
914 ; RVI-NEXT: vle32.v v8, (a0)
916 ; RVI-NEXT: vsub.vx v9, v8, a1
917 ; RVI-NEXT: vnot.v v8, v8
918 ; RVI-NEXT: vand.vv v8, v8, v9
919 ; RVI-NEXT: vsrl.vi v9, v8, 1
920 ; RVI-NEXT: lui a1, 349525
921 ; RVI-NEXT: addi a1, a1, 1365
922 ; RVI-NEXT: vand.vx v9, v9, a1
923 ; RVI-NEXT: vsub.vv v8, v8, v9
924 ; RVI-NEXT: lui a1, 209715
925 ; RVI-NEXT: addi a1, a1, 819
926 ; RVI-NEXT: vand.vx v9, v8, a1
927 ; RVI-NEXT: vsrl.vi v8, v8, 2
928 ; RVI-NEXT: vand.vx v8, v8, a1
929 ; RVI-NEXT: vadd.vv v8, v9, v8
930 ; RVI-NEXT: vsrl.vi v9, v8, 4
931 ; RVI-NEXT: vadd.vv v8, v8, v9
932 ; RVI-NEXT: lui a1, 61681
933 ; RVI-NEXT: addi a1, a1, -241
934 ; RVI-NEXT: vand.vx v8, v8, a1
935 ; RVI-NEXT: lui a1, 4112
936 ; RVI-NEXT: addi a1, a1, 257
937 ; RVI-NEXT: vmul.vx v8, v8, a1
938 ; RVI-NEXT: vsrl.vi v8, v8, 24
939 ; RVI-NEXT: vse32.v v8, (a0)
942 ; RVF-LABEL: cttz_zero_undef_v4i32:
944 ; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma
945 ; RVF-NEXT: vle32.v v8, (a0)
946 ; RVF-NEXT: vrsub.vi v9, v8, 0
947 ; RVF-NEXT: vand.vv v8, v8, v9
948 ; RVF-NEXT: fsrmi a1, 1
949 ; RVF-NEXT: vfcvt.f.xu.v v8, v8
951 ; RVF-NEXT: vsrl.vi v8, v8, 23
952 ; RVF-NEXT: li a1, 127
953 ; RVF-NEXT: vsub.vx v8, v8, a1
954 ; RVF-NEXT: vse32.v v8, (a0)
957 ; RVD-LABEL: cttz_zero_undef_v4i32:
959 ; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma
960 ; RVD-NEXT: vle32.v v8, (a0)
961 ; RVD-NEXT: vrsub.vi v9, v8, 0
962 ; RVD-NEXT: vand.vv v8, v8, v9
963 ; RVD-NEXT: vfwcvt.f.xu.v v10, v8
964 ; RVD-NEXT: li a1, 52
965 ; RVD-NEXT: vnsrl.wx v8, v10, a1
966 ; RVD-NEXT: li a1, 1023
967 ; RVD-NEXT: vsub.vx v8, v8, a1
968 ; RVD-NEXT: vse32.v v8, (a0)
971 ; ZVBB-LABEL: cttz_zero_undef_v4i32:
973 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
974 ; ZVBB-NEXT: vle32.v v8, (a0)
975 ; ZVBB-NEXT: vctz.v v8, v8
976 ; ZVBB-NEXT: vse32.v v8, (a0)
978 %a = load <4 x i32>, ptr %x
979 %b = load <4 x i32>, ptr %y
980 %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
981 store <4 x i32> %c, ptr %x
985 define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
986 ; RV32I-LABEL: cttz_zero_undef_v2i64:
988 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
989 ; RV32I-NEXT: vle64.v v8, (a0)
990 ; RV32I-NEXT: li a1, 1
991 ; RV32I-NEXT: vsub.vx v9, v8, a1
992 ; RV32I-NEXT: vnot.v v8, v8
993 ; RV32I-NEXT: vand.vv v8, v8, v9
994 ; RV32I-NEXT: vsrl.vi v9, v8, 1
995 ; RV32I-NEXT: lui a1, 349525
996 ; RV32I-NEXT: addi a1, a1, 1365
997 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
998 ; RV32I-NEXT: vmv.v.x v10, a1
999 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1000 ; RV32I-NEXT: vand.vv v9, v9, v10
1001 ; RV32I-NEXT: vsub.vv v8, v8, v9
1002 ; RV32I-NEXT: lui a1, 209715
1003 ; RV32I-NEXT: addi a1, a1, 819
1004 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1005 ; RV32I-NEXT: vmv.v.x v9, a1
1006 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1007 ; RV32I-NEXT: vand.vv v10, v8, v9
1008 ; RV32I-NEXT: vsrl.vi v8, v8, 2
1009 ; RV32I-NEXT: vand.vv v8, v8, v9
1010 ; RV32I-NEXT: vadd.vv v8, v10, v8
1011 ; RV32I-NEXT: vsrl.vi v9, v8, 4
1012 ; RV32I-NEXT: vadd.vv v8, v8, v9
1013 ; RV32I-NEXT: lui a1, 61681
1014 ; RV32I-NEXT: addi a1, a1, -241
1015 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1016 ; RV32I-NEXT: vmv.v.x v9, a1
1017 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1018 ; RV32I-NEXT: vand.vv v8, v8, v9
1019 ; RV32I-NEXT: lui a1, 4112
1020 ; RV32I-NEXT: addi a1, a1, 257
1021 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1022 ; RV32I-NEXT: vmv.v.x v9, a1
1023 ; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1024 ; RV32I-NEXT: vmul.vv v8, v8, v9
1025 ; RV32I-NEXT: li a1, 56
1026 ; RV32I-NEXT: vsrl.vx v8, v8, a1
1027 ; RV32I-NEXT: vse64.v v8, (a0)
1030 ; RV64I-LABEL: cttz_zero_undef_v2i64:
1032 ; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1033 ; RV64I-NEXT: vle64.v v8, (a0)
1034 ; RV64I-NEXT: li a1, 1
1035 ; RV64I-NEXT: vsub.vx v9, v8, a1
1036 ; RV64I-NEXT: vnot.v v8, v8
1037 ; RV64I-NEXT: vand.vv v8, v8, v9
1038 ; RV64I-NEXT: vsrl.vi v9, v8, 1
1039 ; RV64I-NEXT: lui a1, 349525
1040 ; RV64I-NEXT: addiw a1, a1, 1365
1041 ; RV64I-NEXT: slli a2, a1, 32
1042 ; RV64I-NEXT: add a1, a1, a2
1043 ; RV64I-NEXT: vand.vx v9, v9, a1
1044 ; RV64I-NEXT: vsub.vv v8, v8, v9
1045 ; RV64I-NEXT: lui a1, 209715
1046 ; RV64I-NEXT: addiw a1, a1, 819
1047 ; RV64I-NEXT: slli a2, a1, 32
1048 ; RV64I-NEXT: add a1, a1, a2
1049 ; RV64I-NEXT: vand.vx v9, v8, a1
1050 ; RV64I-NEXT: vsrl.vi v8, v8, 2
1051 ; RV64I-NEXT: vand.vx v8, v8, a1
1052 ; RV64I-NEXT: vadd.vv v8, v9, v8
1053 ; RV64I-NEXT: vsrl.vi v9, v8, 4
1054 ; RV64I-NEXT: vadd.vv v8, v8, v9
1055 ; RV64I-NEXT: lui a1, 61681
1056 ; RV64I-NEXT: addiw a1, a1, -241
1057 ; RV64I-NEXT: slli a2, a1, 32
1058 ; RV64I-NEXT: add a1, a1, a2
1059 ; RV64I-NEXT: vand.vx v8, v8, a1
1060 ; RV64I-NEXT: lui a1, 4112
1061 ; RV64I-NEXT: addiw a1, a1, 257
1062 ; RV64I-NEXT: slli a2, a1, 32
1063 ; RV64I-NEXT: add a1, a1, a2
1064 ; RV64I-NEXT: vmul.vx v8, v8, a1
1065 ; RV64I-NEXT: li a1, 56
1066 ; RV64I-NEXT: vsrl.vx v8, v8, a1
1067 ; RV64I-NEXT: vse64.v v8, (a0)
1070 ; RVF-LABEL: cttz_zero_undef_v2i64:
1072 ; RVF-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1073 ; RVF-NEXT: vle64.v v8, (a0)
1074 ; RVF-NEXT: vrsub.vi v9, v8, 0
1075 ; RVF-NEXT: vand.vv v8, v8, v9
1076 ; RVF-NEXT: fsrmi a1, 1
1077 ; RVF-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1078 ; RVF-NEXT: vfncvt.f.xu.w v9, v8
1080 ; RVF-NEXT: vsrl.vi v8, v9, 23
1081 ; RVF-NEXT: li a1, 127
1082 ; RVF-NEXT: vwsubu.vx v9, v8, a1
1083 ; RVF-NEXT: vse64.v v9, (a0)
1086 ; RVD-LABEL: cttz_zero_undef_v2i64:
1088 ; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1089 ; RVD-NEXT: vle64.v v8, (a0)
1090 ; RVD-NEXT: vrsub.vi v9, v8, 0
1091 ; RVD-NEXT: vand.vv v8, v8, v9
1092 ; RVD-NEXT: fsrmi a1, 1
1093 ; RVD-NEXT: vfcvt.f.xu.v v8, v8
1095 ; RVD-NEXT: li a1, 52
1096 ; RVD-NEXT: vsrl.vx v8, v8, a1
1097 ; RVD-NEXT: li a1, 1023
1098 ; RVD-NEXT: vsub.vx v8, v8, a1
1099 ; RVD-NEXT: vse64.v v8, (a0)
1102 ; ZVBB-LABEL: cttz_zero_undef_v2i64:
1104 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1105 ; ZVBB-NEXT: vle64.v v8, (a0)
1106 ; ZVBB-NEXT: vctz.v v8, v8
1107 ; ZVBB-NEXT: vse64.v v8, (a0)
1109 %a = load <2 x i64>, ptr %x
1110 %b = load <2 x i64>, ptr %y
1111 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
1112 store <2 x i64> %c, ptr %x
1116 define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
1117 ; RVI-LABEL: cttz_zero_undef_v32i8:
1119 ; RVI-NEXT: li a1, 32
1120 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1121 ; RVI-NEXT: vle8.v v8, (a0)
1122 ; RVI-NEXT: li a1, 1
1123 ; RVI-NEXT: vsub.vx v10, v8, a1
1124 ; RVI-NEXT: vnot.v v8, v8
1125 ; RVI-NEXT: vand.vv v8, v8, v10
1126 ; RVI-NEXT: vsrl.vi v10, v8, 1
1127 ; RVI-NEXT: li a1, 85
1128 ; RVI-NEXT: vand.vx v10, v10, a1
1129 ; RVI-NEXT: vsub.vv v8, v8, v10
1130 ; RVI-NEXT: li a1, 51
1131 ; RVI-NEXT: vand.vx v10, v8, a1
1132 ; RVI-NEXT: vsrl.vi v8, v8, 2
1133 ; RVI-NEXT: vand.vx v8, v8, a1
1134 ; RVI-NEXT: vadd.vv v8, v10, v8
1135 ; RVI-NEXT: vsrl.vi v10, v8, 4
1136 ; RVI-NEXT: vadd.vv v8, v8, v10
1137 ; RVI-NEXT: vand.vi v8, v8, 15
1138 ; RVI-NEXT: vse8.v v8, (a0)
1141 ; RVF-LABEL: cttz_zero_undef_v32i8:
1143 ; RVF-NEXT: li a1, 32
1144 ; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1145 ; RVF-NEXT: vle8.v v8, (a0)
1146 ; RVF-NEXT: vrsub.vi v10, v8, 0
1147 ; RVF-NEXT: vand.vv v8, v8, v10
1148 ; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1149 ; RVF-NEXT: vzext.vf2 v12, v8
1150 ; RVF-NEXT: vfwcvt.f.xu.v v16, v12
1151 ; RVF-NEXT: vnsrl.wi v8, v16, 23
1152 ; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1153 ; RVF-NEXT: vnsrl.wi v12, v8, 0
1154 ; RVF-NEXT: li a1, 127
1155 ; RVF-NEXT: vsub.vx v8, v12, a1
1156 ; RVF-NEXT: vse8.v v8, (a0)
1159 ; RVD-LABEL: cttz_zero_undef_v32i8:
1161 ; RVD-NEXT: li a1, 32
1162 ; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1163 ; RVD-NEXT: vle8.v v8, (a0)
1164 ; RVD-NEXT: vrsub.vi v10, v8, 0
1165 ; RVD-NEXT: vand.vv v8, v8, v10
1166 ; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1167 ; RVD-NEXT: vzext.vf2 v12, v8
1168 ; RVD-NEXT: vfwcvt.f.xu.v v16, v12
1169 ; RVD-NEXT: vnsrl.wi v8, v16, 23
1170 ; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1171 ; RVD-NEXT: vnsrl.wi v12, v8, 0
1172 ; RVD-NEXT: li a1, 127
1173 ; RVD-NEXT: vsub.vx v8, v12, a1
1174 ; RVD-NEXT: vse8.v v8, (a0)
1177 ; ZVBB-LABEL: cttz_zero_undef_v32i8:
1179 ; ZVBB-NEXT: li a1, 32
1180 ; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1181 ; ZVBB-NEXT: vle8.v v8, (a0)
1182 ; ZVBB-NEXT: vctz.v v8, v8
1183 ; ZVBB-NEXT: vse8.v v8, (a0)
1185 %a = load <32 x i8>, ptr %x
1186 %b = load <32 x i8>, ptr %y
1187 %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
1188 store <32 x i8> %c, ptr %x
1192 define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
1193 ; RVI-LABEL: cttz_zero_undef_v16i16:
1195 ; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1196 ; RVI-NEXT: vle16.v v8, (a0)
1197 ; RVI-NEXT: li a1, 1
1198 ; RVI-NEXT: vsub.vx v10, v8, a1
1199 ; RVI-NEXT: vnot.v v8, v8
1200 ; RVI-NEXT: vand.vv v8, v8, v10
1201 ; RVI-NEXT: vsrl.vi v10, v8, 1
1202 ; RVI-NEXT: lui a1, 5
1203 ; RVI-NEXT: addi a1, a1, 1365
1204 ; RVI-NEXT: vand.vx v10, v10, a1
1205 ; RVI-NEXT: vsub.vv v8, v8, v10
1206 ; RVI-NEXT: lui a1, 3
1207 ; RVI-NEXT: addi a1, a1, 819
1208 ; RVI-NEXT: vand.vx v10, v8, a1
1209 ; RVI-NEXT: vsrl.vi v8, v8, 2
1210 ; RVI-NEXT: vand.vx v8, v8, a1
1211 ; RVI-NEXT: vadd.vv v8, v10, v8
1212 ; RVI-NEXT: vsrl.vi v10, v8, 4
1213 ; RVI-NEXT: vadd.vv v8, v8, v10
1214 ; RVI-NEXT: lui a1, 1
1215 ; RVI-NEXT: addi a1, a1, -241
1216 ; RVI-NEXT: vand.vx v8, v8, a1
1217 ; RVI-NEXT: li a1, 257
1218 ; RVI-NEXT: vmul.vx v8, v8, a1
1219 ; RVI-NEXT: vsrl.vi v8, v8, 8
1220 ; RVI-NEXT: vse16.v v8, (a0)
1223 ; RVF-LABEL: cttz_zero_undef_v16i16:
1225 ; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1226 ; RVF-NEXT: vle16.v v8, (a0)
1227 ; RVF-NEXT: vrsub.vi v10, v8, 0
1228 ; RVF-NEXT: vand.vv v8, v8, v10
1229 ; RVF-NEXT: vfwcvt.f.xu.v v12, v8
1230 ; RVF-NEXT: vnsrl.wi v8, v12, 23
1231 ; RVF-NEXT: li a1, 127
1232 ; RVF-NEXT: vsub.vx v8, v8, a1
1233 ; RVF-NEXT: vse16.v v8, (a0)
1236 ; RVD-LABEL: cttz_zero_undef_v16i16:
1238 ; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1239 ; RVD-NEXT: vle16.v v8, (a0)
1240 ; RVD-NEXT: vrsub.vi v10, v8, 0
1241 ; RVD-NEXT: vand.vv v8, v8, v10
1242 ; RVD-NEXT: vfwcvt.f.xu.v v12, v8
1243 ; RVD-NEXT: vnsrl.wi v8, v12, 23
1244 ; RVD-NEXT: li a1, 127
1245 ; RVD-NEXT: vsub.vx v8, v8, a1
1246 ; RVD-NEXT: vse16.v v8, (a0)
1249 ; ZVBB-LABEL: cttz_zero_undef_v16i16:
1251 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1252 ; ZVBB-NEXT: vle16.v v8, (a0)
1253 ; ZVBB-NEXT: vctz.v v8, v8
1254 ; ZVBB-NEXT: vse16.v v8, (a0)
1256 %a = load <16 x i16>, ptr %x
1257 %b = load <16 x i16>, ptr %y
1258 %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
1259 store <16 x i16> %c, ptr %x
1263 define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
1264 ; RVI-LABEL: cttz_zero_undef_v8i32:
1266 ; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1267 ; RVI-NEXT: vle32.v v8, (a0)
1268 ; RVI-NEXT: li a1, 1
1269 ; RVI-NEXT: vsub.vx v10, v8, a1
1270 ; RVI-NEXT: vnot.v v8, v8
1271 ; RVI-NEXT: vand.vv v8, v8, v10
1272 ; RVI-NEXT: vsrl.vi v10, v8, 1
1273 ; RVI-NEXT: lui a1, 349525
1274 ; RVI-NEXT: addi a1, a1, 1365
1275 ; RVI-NEXT: vand.vx v10, v10, a1
1276 ; RVI-NEXT: vsub.vv v8, v8, v10
1277 ; RVI-NEXT: lui a1, 209715
1278 ; RVI-NEXT: addi a1, a1, 819
1279 ; RVI-NEXT: vand.vx v10, v8, a1
1280 ; RVI-NEXT: vsrl.vi v8, v8, 2
1281 ; RVI-NEXT: vand.vx v8, v8, a1
1282 ; RVI-NEXT: vadd.vv v8, v10, v8
1283 ; RVI-NEXT: vsrl.vi v10, v8, 4
1284 ; RVI-NEXT: vadd.vv v8, v8, v10
1285 ; RVI-NEXT: lui a1, 61681
1286 ; RVI-NEXT: addi a1, a1, -241
1287 ; RVI-NEXT: vand.vx v8, v8, a1
1288 ; RVI-NEXT: lui a1, 4112
1289 ; RVI-NEXT: addi a1, a1, 257
1290 ; RVI-NEXT: vmul.vx v8, v8, a1
1291 ; RVI-NEXT: vsrl.vi v8, v8, 24
1292 ; RVI-NEXT: vse32.v v8, (a0)
1295 ; RVF-LABEL: cttz_zero_undef_v8i32:
1297 ; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1298 ; RVF-NEXT: vle32.v v8, (a0)
1299 ; RVF-NEXT: vrsub.vi v10, v8, 0
1300 ; RVF-NEXT: vand.vv v8, v8, v10
1301 ; RVF-NEXT: fsrmi a1, 1
1302 ; RVF-NEXT: vfcvt.f.xu.v v8, v8
1304 ; RVF-NEXT: vsrl.vi v8, v8, 23
1305 ; RVF-NEXT: li a1, 127
1306 ; RVF-NEXT: vsub.vx v8, v8, a1
1307 ; RVF-NEXT: vse32.v v8, (a0)
1310 ; RVD-LABEL: cttz_zero_undef_v8i32:
1312 ; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1313 ; RVD-NEXT: vle32.v v8, (a0)
1314 ; RVD-NEXT: vrsub.vi v10, v8, 0
1315 ; RVD-NEXT: vand.vv v8, v8, v10
1316 ; RVD-NEXT: vfwcvt.f.xu.v v12, v8
1317 ; RVD-NEXT: li a1, 52
1318 ; RVD-NEXT: vnsrl.wx v8, v12, a1
1319 ; RVD-NEXT: li a1, 1023
1320 ; RVD-NEXT: vsub.vx v8, v8, a1
1321 ; RVD-NEXT: vse32.v v8, (a0)
1324 ; ZVBB-LABEL: cttz_zero_undef_v8i32:
1326 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1327 ; ZVBB-NEXT: vle32.v v8, (a0)
1328 ; ZVBB-NEXT: vctz.v v8, v8
1329 ; ZVBB-NEXT: vse32.v v8, (a0)
1331 %a = load <8 x i32>, ptr %x
1332 %b = load <8 x i32>, ptr %y
1333 %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
1334 store <8 x i32> %c, ptr %x
1338 define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
1339 ; RV32I-LABEL: cttz_zero_undef_v4i64:
1341 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1342 ; RV32I-NEXT: vle64.v v8, (a0)
1343 ; RV32I-NEXT: li a1, 1
1344 ; RV32I-NEXT: vsub.vx v10, v8, a1
1345 ; RV32I-NEXT: vnot.v v8, v8
1346 ; RV32I-NEXT: vand.vv v8, v8, v10
1347 ; RV32I-NEXT: vsrl.vi v10, v8, 1
1348 ; RV32I-NEXT: lui a1, 349525
1349 ; RV32I-NEXT: addi a1, a1, 1365
1350 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1351 ; RV32I-NEXT: vmv.v.x v12, a1
1352 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1353 ; RV32I-NEXT: vand.vv v10, v10, v12
1354 ; RV32I-NEXT: vsub.vv v8, v8, v10
1355 ; RV32I-NEXT: lui a1, 209715
1356 ; RV32I-NEXT: addi a1, a1, 819
1357 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1358 ; RV32I-NEXT: vmv.v.x v10, a1
1359 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1360 ; RV32I-NEXT: vand.vv v12, v8, v10
1361 ; RV32I-NEXT: vsrl.vi v8, v8, 2
1362 ; RV32I-NEXT: vand.vv v8, v8, v10
1363 ; RV32I-NEXT: vadd.vv v8, v12, v8
1364 ; RV32I-NEXT: vsrl.vi v10, v8, 4
1365 ; RV32I-NEXT: vadd.vv v8, v8, v10
1366 ; RV32I-NEXT: lui a1, 61681
1367 ; RV32I-NEXT: addi a1, a1, -241
1368 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1369 ; RV32I-NEXT: vmv.v.x v10, a1
1370 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1371 ; RV32I-NEXT: vand.vv v8, v8, v10
1372 ; RV32I-NEXT: lui a1, 4112
1373 ; RV32I-NEXT: addi a1, a1, 257
1374 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1375 ; RV32I-NEXT: vmv.v.x v10, a1
1376 ; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1377 ; RV32I-NEXT: vmul.vv v8, v8, v10
1378 ; RV32I-NEXT: li a1, 56
1379 ; RV32I-NEXT: vsrl.vx v8, v8, a1
1380 ; RV32I-NEXT: vse64.v v8, (a0)
1383 ; RV64I-LABEL: cttz_zero_undef_v4i64:
1385 ; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1386 ; RV64I-NEXT: vle64.v v8, (a0)
1387 ; RV64I-NEXT: li a1, 1
1388 ; RV64I-NEXT: vsub.vx v10, v8, a1
1389 ; RV64I-NEXT: vnot.v v8, v8
1390 ; RV64I-NEXT: vand.vv v8, v8, v10
1391 ; RV64I-NEXT: vsrl.vi v10, v8, 1
1392 ; RV64I-NEXT: lui a1, 349525
1393 ; RV64I-NEXT: addiw a1, a1, 1365
1394 ; RV64I-NEXT: slli a2, a1, 32
1395 ; RV64I-NEXT: add a1, a1, a2
1396 ; RV64I-NEXT: vand.vx v10, v10, a1
1397 ; RV64I-NEXT: vsub.vv v8, v8, v10
1398 ; RV64I-NEXT: lui a1, 209715
1399 ; RV64I-NEXT: addiw a1, a1, 819
1400 ; RV64I-NEXT: slli a2, a1, 32
1401 ; RV64I-NEXT: add a1, a1, a2
1402 ; RV64I-NEXT: vand.vx v10, v8, a1
1403 ; RV64I-NEXT: vsrl.vi v8, v8, 2
1404 ; RV64I-NEXT: vand.vx v8, v8, a1
1405 ; RV64I-NEXT: vadd.vv v8, v10, v8
1406 ; RV64I-NEXT: vsrl.vi v10, v8, 4
1407 ; RV64I-NEXT: vadd.vv v8, v8, v10
1408 ; RV64I-NEXT: lui a1, 61681
1409 ; RV64I-NEXT: addiw a1, a1, -241
1410 ; RV64I-NEXT: slli a2, a1, 32
1411 ; RV64I-NEXT: add a1, a1, a2
1412 ; RV64I-NEXT: vand.vx v8, v8, a1
1413 ; RV64I-NEXT: lui a1, 4112
1414 ; RV64I-NEXT: addiw a1, a1, 257
1415 ; RV64I-NEXT: slli a2, a1, 32
1416 ; RV64I-NEXT: add a1, a1, a2
1417 ; RV64I-NEXT: vmul.vx v8, v8, a1
1418 ; RV64I-NEXT: li a1, 56
1419 ; RV64I-NEXT: vsrl.vx v8, v8, a1
1420 ; RV64I-NEXT: vse64.v v8, (a0)
1423 ; RVF-LABEL: cttz_zero_undef_v4i64:
1425 ; RVF-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1426 ; RVF-NEXT: vle64.v v8, (a0)
1427 ; RVF-NEXT: vrsub.vi v10, v8, 0
1428 ; RVF-NEXT: vand.vv v8, v8, v10
1429 ; RVF-NEXT: fsrmi a1, 1
1430 ; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1431 ; RVF-NEXT: vfncvt.f.xu.w v10, v8
1433 ; RVF-NEXT: vsrl.vi v8, v10, 23
1434 ; RVF-NEXT: li a1, 127
1435 ; RVF-NEXT: vwsubu.vx v10, v8, a1
1436 ; RVF-NEXT: vse64.v v10, (a0)
1439 ; RVD-LABEL: cttz_zero_undef_v4i64:
1441 ; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1442 ; RVD-NEXT: vle64.v v8, (a0)
1443 ; RVD-NEXT: vrsub.vi v10, v8, 0
1444 ; RVD-NEXT: vand.vv v8, v8, v10
1445 ; RVD-NEXT: fsrmi a1, 1
1446 ; RVD-NEXT: vfcvt.f.xu.v v8, v8
1448 ; RVD-NEXT: li a1, 52
1449 ; RVD-NEXT: vsrl.vx v8, v8, a1
1450 ; RVD-NEXT: li a1, 1023
1451 ; RVD-NEXT: vsub.vx v8, v8, a1
1452 ; RVD-NEXT: vse64.v v8, (a0)
1455 ; ZVBB-LABEL: cttz_zero_undef_v4i64:
1457 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1458 ; ZVBB-NEXT: vle64.v v8, (a0)
1459 ; ZVBB-NEXT: vctz.v v8, v8
1460 ; ZVBB-NEXT: vse64.v v8, (a0)
1462 %a = load <4 x i64>, ptr %x
1463 %b = load <4 x i64>, ptr %y
1464 %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
1465 store <4 x i64> %c, ptr %x
1468 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: