1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F
7 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F
8 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D
9 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D
10 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
11 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
12 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
13 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
14 ; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
15 ; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
17 define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
18 ; CHECK-LABEL: cttz_v16i8:
20 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
21 ; CHECK-NEXT: vle8.v v8, (a0)
22 ; CHECK-NEXT: li a1, 1
23 ; CHECK-NEXT: vsub.vx v9, v8, a1
24 ; CHECK-NEXT: vnot.v v8, v8
25 ; CHECK-NEXT: vand.vv v8, v8, v9
26 ; CHECK-NEXT: vsrl.vi v9, v8, 1
27 ; CHECK-NEXT: li a1, 85
28 ; CHECK-NEXT: vand.vx v9, v9, a1
29 ; CHECK-NEXT: vsub.vv v8, v8, v9
30 ; CHECK-NEXT: li a1, 51
31 ; CHECK-NEXT: vand.vx v9, v8, a1
32 ; CHECK-NEXT: vsrl.vi v8, v8, 2
33 ; CHECK-NEXT: vand.vx v8, v8, a1
34 ; CHECK-NEXT: vadd.vv v8, v9, v8
35 ; CHECK-NEXT: vsrl.vi v9, v8, 4
36 ; CHECK-NEXT: vadd.vv v8, v8, v9
37 ; CHECK-NEXT: vand.vi v8, v8, 15
38 ; CHECK-NEXT: vse8.v v8, (a0)
41 ; LMULMAX8-LABEL: cttz_v16i8:
43 ; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, ma
44 ; LMULMAX8-NEXT: vle8.v v8, (a0)
45 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
46 ; LMULMAX8-NEXT: vand.vv v9, v8, v9
47 ; LMULMAX8-NEXT: vsetvli zero, zero, e16, m2, ta, ma
48 ; LMULMAX8-NEXT: vzext.vf2 v10, v9
49 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10
50 ; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23
51 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma
52 ; LMULMAX8-NEXT: vnsrl.wi v9, v10, 0
53 ; LMULMAX8-NEXT: li a1, 127
54 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
55 ; LMULMAX8-NEXT: vsub.vx v8, v9, a1
56 ; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0
57 ; LMULMAX8-NEXT: vse8.v v8, (a0)
60 ; ZVBB-LABEL: cttz_v16i8:
62 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
63 ; ZVBB-NEXT: vle8.v v8, (a0)
64 ; ZVBB-NEXT: vctz.v v8, v8
65 ; ZVBB-NEXT: vse8.v v8, (a0)
67 %a = load <16 x i8>, ptr %x
68 %b = load <16 x i8>, ptr %y
69 %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
70 store <16 x i8> %c, ptr %x
73 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
75 define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
76 ; LMULMAX2-RV32I-LABEL: cttz_v8i16:
77 ; LMULMAX2-RV32I: # %bb.0:
78 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e16, m1, ta, ma
79 ; LMULMAX2-RV32I-NEXT: vle16.v v8, (a0)
80 ; LMULMAX2-RV32I-NEXT: li a1, 1
81 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
82 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
83 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
84 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
85 ; LMULMAX2-RV32I-NEXT: lui a1, 5
86 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
87 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1
88 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
89 ; LMULMAX2-RV32I-NEXT: lui a1, 3
90 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
91 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1
92 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
93 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
94 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
95 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
96 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
97 ; LMULMAX2-RV32I-NEXT: lui a1, 1
98 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
99 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
100 ; LMULMAX2-RV32I-NEXT: li a1, 257
101 ; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1
102 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 8
103 ; LMULMAX2-RV32I-NEXT: vse16.v v8, (a0)
104 ; LMULMAX2-RV32I-NEXT: ret
106 ; LMULMAX2-RV64I-LABEL: cttz_v8i16:
107 ; LMULMAX2-RV64I: # %bb.0:
108 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e16, m1, ta, ma
109 ; LMULMAX2-RV64I-NEXT: vle16.v v8, (a0)
110 ; LMULMAX2-RV64I-NEXT: li a1, 1
111 ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1
112 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
113 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
114 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
115 ; LMULMAX2-RV64I-NEXT: lui a1, 5
116 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
117 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
118 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
119 ; LMULMAX2-RV64I-NEXT: lui a1, 3
120 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
121 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
122 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
123 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
124 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8
125 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
126 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
127 ; LMULMAX2-RV64I-NEXT: lui a1, 1
128 ; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
129 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
130 ; LMULMAX2-RV64I-NEXT: li a1, 257
131 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
132 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 8
133 ; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0)
134 ; LMULMAX2-RV64I-NEXT: ret
136 ; LMULMAX1-LABEL: cttz_v8i16:
138 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
139 ; LMULMAX1-NEXT: vle16.v v8, (a0)
140 ; LMULMAX1-NEXT: li a1, 1
141 ; LMULMAX1-NEXT: vsub.vx v9, v8, a1
142 ; LMULMAX1-NEXT: vnot.v v8, v8
143 ; LMULMAX1-NEXT: vand.vv v8, v8, v9
144 ; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
145 ; LMULMAX1-NEXT: lui a1, 5
146 ; LMULMAX1-NEXT: addi a1, a1, 1365
147 ; LMULMAX1-NEXT: vand.vx v9, v9, a1
148 ; LMULMAX1-NEXT: vsub.vv v8, v8, v9
149 ; LMULMAX1-NEXT: lui a1, 3
150 ; LMULMAX1-NEXT: addi a1, a1, 819
151 ; LMULMAX1-NEXT: vand.vx v9, v8, a1
152 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
153 ; LMULMAX1-NEXT: vand.vx v8, v8, a1
154 ; LMULMAX1-NEXT: vadd.vv v8, v9, v8
155 ; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
156 ; LMULMAX1-NEXT: vadd.vv v8, v8, v9
157 ; LMULMAX1-NEXT: lui a1, 1
158 ; LMULMAX1-NEXT: addi a1, a1, -241
159 ; LMULMAX1-NEXT: vand.vx v8, v8, a1
160 ; LMULMAX1-NEXT: li a1, 257
161 ; LMULMAX1-NEXT: vmul.vx v8, v8, a1
162 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
163 ; LMULMAX1-NEXT: vse16.v v8, (a0)
166 ; LMULMAX2-RV32F-LABEL: cttz_v8i16:
167 ; LMULMAX2-RV32F: # %bb.0:
168 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
169 ; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0)
170 ; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0
171 ; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9
172 ; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v9
173 ; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23
174 ; LMULMAX2-RV32F-NEXT: li a1, 127
175 ; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1
176 ; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0
177 ; LMULMAX2-RV32F-NEXT: li a1, 16
178 ; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0
179 ; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0)
180 ; LMULMAX2-RV32F-NEXT: ret
182 ; LMULMAX2-RV64F-LABEL: cttz_v8i16:
183 ; LMULMAX2-RV64F: # %bb.0:
184 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
185 ; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0)
186 ; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0
187 ; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9
188 ; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v9
189 ; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23
190 ; LMULMAX2-RV64F-NEXT: li a1, 127
191 ; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1
192 ; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0
193 ; LMULMAX2-RV64F-NEXT: li a1, 16
194 ; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0
195 ; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0)
196 ; LMULMAX2-RV64F-NEXT: ret
198 ; LMULMAX2-RV32D-LABEL: cttz_v8i16:
199 ; LMULMAX2-RV32D: # %bb.0:
200 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma
201 ; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0)
202 ; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0
203 ; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9
204 ; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v9
205 ; LMULMAX2-RV32D-NEXT: vnsrl.wi v9, v10, 23
206 ; LMULMAX2-RV32D-NEXT: li a1, 127
207 ; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1
208 ; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0
209 ; LMULMAX2-RV32D-NEXT: li a1, 16
210 ; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0
211 ; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0)
212 ; LMULMAX2-RV32D-NEXT: ret
214 ; LMULMAX2-RV64D-LABEL: cttz_v8i16:
215 ; LMULMAX2-RV64D: # %bb.0:
216 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma
217 ; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0)
218 ; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0
219 ; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9
220 ; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v9
221 ; LMULMAX2-RV64D-NEXT: vnsrl.wi v9, v10, 23
222 ; LMULMAX2-RV64D-NEXT: li a1, 127
223 ; LMULMAX2-RV64D-NEXT: vsub.vx v9, v9, a1
224 ; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0
225 ; LMULMAX2-RV64D-NEXT: li a1, 16
226 ; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0
227 ; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0)
228 ; LMULMAX2-RV64D-NEXT: ret
230 ; LMULMAX8-LABEL: cttz_v8i16:
232 ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma
233 ; LMULMAX8-NEXT: vle16.v v8, (a0)
234 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
235 ; LMULMAX8-NEXT: vand.vv v9, v8, v9
236 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v9
237 ; LMULMAX8-NEXT: vnsrl.wi v9, v10, 23
238 ; LMULMAX8-NEXT: li a1, 127
239 ; LMULMAX8-NEXT: vsub.vx v9, v9, a1
240 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
241 ; LMULMAX8-NEXT: li a1, 16
242 ; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0
243 ; LMULMAX8-NEXT: vse16.v v8, (a0)
246 ; ZVBB-LABEL: cttz_v8i16:
248 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
249 ; ZVBB-NEXT: vle16.v v8, (a0)
250 ; ZVBB-NEXT: vctz.v v8, v8
251 ; ZVBB-NEXT: vse16.v v8, (a0)
253 %a = load <8 x i16>, ptr %x
254 %b = load <8 x i16>, ptr %y
255 %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
256 store <8 x i16> %c, ptr %x
259 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
261 define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
262 ; LMULMAX2-RV32I-LABEL: cttz_v4i32:
263 ; LMULMAX2-RV32I: # %bb.0:
264 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
265 ; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0)
266 ; LMULMAX2-RV32I-NEXT: li a1, 1
267 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
268 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
269 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
270 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
271 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
272 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
273 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1
274 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
275 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
276 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
277 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1
278 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
279 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
280 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
281 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
282 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
283 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
284 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
285 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
286 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
287 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
288 ; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1
289 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24
290 ; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0)
291 ; LMULMAX2-RV32I-NEXT: ret
293 ; LMULMAX2-RV64I-LABEL: cttz_v4i32:
294 ; LMULMAX2-RV64I: # %bb.0:
295 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
296 ; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0)
297 ; LMULMAX2-RV64I-NEXT: li a1, 1
298 ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1
299 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
300 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
301 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
302 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
303 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
304 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
305 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
306 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
307 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
308 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
309 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
310 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
311 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8
312 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
313 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
314 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
315 ; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
316 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
317 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
318 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
319 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
320 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
321 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
322 ; LMULMAX2-RV64I-NEXT: ret
324 ; LMULMAX2-RV32F-LABEL: cttz_v4i32:
325 ; LMULMAX2-RV32F: # %bb.0:
326 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
327 ; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0)
328 ; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0
329 ; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9
330 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
331 ; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v9
332 ; LMULMAX2-RV32F-NEXT: fsrm a1
333 ; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23
334 ; LMULMAX2-RV32F-NEXT: li a1, 127
335 ; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1
336 ; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0
337 ; LMULMAX2-RV32F-NEXT: li a1, 32
338 ; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0
339 ; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0)
340 ; LMULMAX2-RV32F-NEXT: ret
342 ; LMULMAX2-RV64F-LABEL: cttz_v4i32:
343 ; LMULMAX2-RV64F: # %bb.0:
344 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
345 ; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0)
346 ; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0
347 ; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9
348 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
349 ; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v9
350 ; LMULMAX2-RV64F-NEXT: fsrm a1
351 ; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23
352 ; LMULMAX2-RV64F-NEXT: li a1, 127
353 ; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1
354 ; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0
355 ; LMULMAX2-RV64F-NEXT: li a1, 32
356 ; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0
357 ; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0)
358 ; LMULMAX2-RV64F-NEXT: ret
360 ; LMULMAX2-RV32D-LABEL: cttz_v4i32:
361 ; LMULMAX2-RV32D: # %bb.0:
362 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma
363 ; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0)
364 ; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0
365 ; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9
366 ; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v9
367 ; LMULMAX2-RV32D-NEXT: li a1, 52
368 ; LMULMAX2-RV32D-NEXT: vnsrl.wx v9, v10, a1
369 ; LMULMAX2-RV32D-NEXT: li a1, 1023
370 ; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1
371 ; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0
372 ; LMULMAX2-RV32D-NEXT: li a1, 32
373 ; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0
374 ; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0)
375 ; LMULMAX2-RV32D-NEXT: ret
377 ; LMULMAX2-RV64D-LABEL: cttz_v4i32:
378 ; LMULMAX2-RV64D: # %bb.0:
379 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma
380 ; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0)
381 ; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0
382 ; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9
383 ; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v9
384 ; LMULMAX2-RV64D-NEXT: li a1, 52
385 ; LMULMAX2-RV64D-NEXT: vnsrl.wx v9, v10, a1
386 ; LMULMAX2-RV64D-NEXT: li a1, 1023
387 ; LMULMAX2-RV64D-NEXT: vsub.vx v9, v9, a1
388 ; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0
389 ; LMULMAX2-RV64D-NEXT: li a1, 32
390 ; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0
391 ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0)
392 ; LMULMAX2-RV64D-NEXT: ret
394 ; LMULMAX8-LABEL: cttz_v4i32:
396 ; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma
397 ; LMULMAX8-NEXT: vle32.v v8, (a0)
398 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
399 ; LMULMAX8-NEXT: vand.vv v9, v8, v9
400 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v9
401 ; LMULMAX8-NEXT: li a1, 52
402 ; LMULMAX8-NEXT: vnsrl.wx v9, v10, a1
403 ; LMULMAX8-NEXT: li a1, 1023
404 ; LMULMAX8-NEXT: vsub.vx v9, v9, a1
405 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
406 ; LMULMAX8-NEXT: li a1, 32
407 ; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0
408 ; LMULMAX8-NEXT: vse32.v v8, (a0)
411 ; ZVBB-LABEL: cttz_v4i32:
413 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
414 ; ZVBB-NEXT: vle32.v v8, (a0)
415 ; ZVBB-NEXT: vctz.v v8, v8
416 ; ZVBB-NEXT: vse32.v v8, (a0)
418 %a = load <4 x i32>, ptr %x
419 %b = load <4 x i32>, ptr %y
420 %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
421 store <4 x i32> %c, ptr %x
424 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
426 define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
427 ; LMULMAX2-RV32I-LABEL: cttz_v2i64:
428 ; LMULMAX2-RV32I: # %bb.0:
429 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
430 ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
431 ; LMULMAX2-RV32I-NEXT: li a1, 1
432 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
433 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
434 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
435 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
436 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
437 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
438 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
439 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
440 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
441 ; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10
442 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
443 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
444 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
445 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
446 ; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
447 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
448 ; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9
449 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
450 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
451 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
452 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
453 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
454 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
455 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
456 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
457 ; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
458 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
459 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
460 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
461 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
462 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
463 ; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
464 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
465 ; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9
466 ; LMULMAX2-RV32I-NEXT: li a1, 56
467 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
468 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
469 ; LMULMAX2-RV32I-NEXT: ret
471 ; LMULMAX2-RV64I-LABEL: cttz_v2i64:
472 ; LMULMAX2-RV64I: # %bb.0:
473 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
474 ; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0)
475 ; LMULMAX2-RV64I-NEXT: li a1, 1
476 ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1
477 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
478 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
479 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
480 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
481 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
482 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
483 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
484 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
485 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
486 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
487 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
488 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
489 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
490 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
491 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
492 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
493 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8
494 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
495 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
496 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
497 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
498 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
499 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
500 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
501 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
502 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
503 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
504 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
505 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
506 ; LMULMAX2-RV64I-NEXT: li a1, 56
507 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1
508 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0)
509 ; LMULMAX2-RV64I-NEXT: ret
511 ; LMULMAX2-RV32F-LABEL: cttz_v2i64:
512 ; LMULMAX2-RV32F: # %bb.0:
513 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma
514 ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0)
515 ; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0
516 ; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9
517 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
518 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
519 ; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v9
520 ; LMULMAX2-RV32F-NEXT: fsrm a1
521 ; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v10, 23
522 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
523 ; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v9
524 ; LMULMAX2-RV32F-NEXT: li a1, 127
525 ; LMULMAX2-RV32F-NEXT: vsub.vx v9, v10, a1
526 ; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0
527 ; LMULMAX2-RV32F-NEXT: li a1, 64
528 ; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0
529 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0)
530 ; LMULMAX2-RV32F-NEXT: ret
532 ; LMULMAX2-RV64F-LABEL: cttz_v2i64:
533 ; LMULMAX2-RV64F: # %bb.0:
534 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma
535 ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0)
536 ; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0
537 ; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9
538 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
539 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
540 ; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v9
541 ; LMULMAX2-RV64F-NEXT: fsrm a1
542 ; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v10, 23
543 ; LMULMAX2-RV64F-NEXT: li a1, 127
544 ; LMULMAX2-RV64F-NEXT: vwsubu.vx v10, v9, a1
545 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
546 ; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0
547 ; LMULMAX2-RV64F-NEXT: li a1, 64
548 ; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0
549 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0)
550 ; LMULMAX2-RV64F-NEXT: ret
552 ; LMULMAX2-RV32D-LABEL: cttz_v2i64:
553 ; LMULMAX2-RV32D: # %bb.0:
554 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma
555 ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0)
556 ; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0
557 ; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9
558 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1
559 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v9, v9
560 ; LMULMAX2-RV32D-NEXT: fsrm a1
561 ; LMULMAX2-RV32D-NEXT: li a1, 52
562 ; LMULMAX2-RV32D-NEXT: vsrl.vx v9, v9, a1
563 ; LMULMAX2-RV32D-NEXT: li a1, 1023
564 ; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1
565 ; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0
566 ; LMULMAX2-RV32D-NEXT: li a1, 64
567 ; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0
568 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0)
569 ; LMULMAX2-RV32D-NEXT: ret
571 ; LMULMAX2-RV64D-LABEL: cttz_v2i64:
572 ; LMULMAX2-RV64D: # %bb.0:
573 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma
574 ; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0)
575 ; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0
576 ; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9
577 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1
578 ; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v9, v9
579 ; LMULMAX2-RV64D-NEXT: fsrm a1
580 ; LMULMAX2-RV64D-NEXT: li a1, 52
581 ; LMULMAX2-RV64D-NEXT: vsrl.vx v9, v9, a1
582 ; LMULMAX2-RV64D-NEXT: li a1, 1023
583 ; LMULMAX2-RV64D-NEXT: vsub.vx v9, v9, a1
584 ; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0
585 ; LMULMAX2-RV64D-NEXT: li a1, 64
586 ; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0
587 ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0)
588 ; LMULMAX2-RV64D-NEXT: ret
590 ; LMULMAX8-LABEL: cttz_v2i64:
592 ; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma
593 ; LMULMAX8-NEXT: vle64.v v8, (a0)
594 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
595 ; LMULMAX8-NEXT: vand.vv v9, v8, v9
596 ; LMULMAX8-NEXT: fsrmi a1, 1
597 ; LMULMAX8-NEXT: vfcvt.f.xu.v v9, v9
598 ; LMULMAX8-NEXT: fsrm a1
599 ; LMULMAX8-NEXT: li a1, 52
600 ; LMULMAX8-NEXT: vsrl.vx v9, v9, a1
601 ; LMULMAX8-NEXT: li a1, 1023
602 ; LMULMAX8-NEXT: vsub.vx v9, v9, a1
603 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
604 ; LMULMAX8-NEXT: li a1, 64
605 ; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0
606 ; LMULMAX8-NEXT: vse64.v v8, (a0)
609 ; ZVBB-LABEL: cttz_v2i64:
611 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
612 ; ZVBB-NEXT: vle64.v v8, (a0)
613 ; ZVBB-NEXT: vctz.v v8, v8
614 ; ZVBB-NEXT: vse64.v v8, (a0)
616 %a = load <2 x i64>, ptr %x
617 %b = load <2 x i64>, ptr %y
618 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
619 store <2 x i64> %c, ptr %x
622 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
624 define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
625 ; LMULMAX2-LABEL: cttz_v32i8:
627 ; LMULMAX2-NEXT: li a1, 32
628 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
629 ; LMULMAX2-NEXT: vle8.v v8, (a0)
630 ; LMULMAX2-NEXT: li a1, 1
631 ; LMULMAX2-NEXT: vsub.vx v10, v8, a1
632 ; LMULMAX2-NEXT: vnot.v v8, v8
633 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
634 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
635 ; LMULMAX2-NEXT: li a1, 85
636 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
637 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
638 ; LMULMAX2-NEXT: li a1, 51
639 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
640 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
641 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
642 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
643 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
644 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
645 ; LMULMAX2-NEXT: vand.vi v8, v8, 15
646 ; LMULMAX2-NEXT: vse8.v v8, (a0)
649 ; LMULMAX1-LABEL: cttz_v32i8:
651 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
652 ; LMULMAX1-NEXT: addi a1, a0, 16
653 ; LMULMAX1-NEXT: vle8.v v8, (a1)
654 ; LMULMAX1-NEXT: vle8.v v9, (a0)
655 ; LMULMAX1-NEXT: li a2, 1
656 ; LMULMAX1-NEXT: vsub.vx v10, v8, a2
657 ; LMULMAX1-NEXT: vnot.v v8, v8
658 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
659 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
660 ; LMULMAX1-NEXT: li a3, 85
661 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
662 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
663 ; LMULMAX1-NEXT: li a4, 51
664 ; LMULMAX1-NEXT: vand.vx v10, v8, a4
665 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
666 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
667 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
668 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
669 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
670 ; LMULMAX1-NEXT: vand.vi v8, v8, 15
671 ; LMULMAX1-NEXT: vsub.vx v10, v9, a2
672 ; LMULMAX1-NEXT: vnot.v v9, v9
673 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
674 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
675 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
676 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
677 ; LMULMAX1-NEXT: vand.vx v10, v9, a4
678 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
679 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
680 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
681 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
682 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
683 ; LMULMAX1-NEXT: vand.vi v9, v9, 15
684 ; LMULMAX1-NEXT: vse8.v v9, (a0)
685 ; LMULMAX1-NEXT: vse8.v v8, (a1)
688 ; LMULMAX8-LABEL: cttz_v32i8:
690 ; LMULMAX8-NEXT: li a1, 32
691 ; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma
692 ; LMULMAX8-NEXT: vle8.v v8, (a0)
693 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
694 ; LMULMAX8-NEXT: vand.vv v10, v8, v10
695 ; LMULMAX8-NEXT: vsetvli zero, zero, e16, m4, ta, ma
696 ; LMULMAX8-NEXT: vzext.vf2 v12, v10
697 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12
698 ; LMULMAX8-NEXT: vnsrl.wi v12, v16, 23
699 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma
700 ; LMULMAX8-NEXT: vnsrl.wi v10, v12, 0
701 ; LMULMAX8-NEXT: li a1, 127
702 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
703 ; LMULMAX8-NEXT: vsub.vx v8, v10, a1
704 ; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0
705 ; LMULMAX8-NEXT: vse8.v v8, (a0)
708 ; ZVBB-LABEL: cttz_v32i8:
710 ; ZVBB-NEXT: li a1, 32
711 ; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
712 ; ZVBB-NEXT: vle8.v v8, (a0)
713 ; ZVBB-NEXT: vctz.v v8, v8
714 ; ZVBB-NEXT: vse8.v v8, (a0)
716 %a = load <32 x i8>, ptr %x
717 %b = load <32 x i8>, ptr %y
718 %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
719 store <32 x i8> %c, ptr %x
722 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
724 define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
725 ; LMULMAX2-LABEL: cttz_v16i16:
727 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
728 ; LMULMAX2-NEXT: vle16.v v8, (a0)
729 ; LMULMAX2-NEXT: li a1, 1
730 ; LMULMAX2-NEXT: vsub.vx v10, v8, a1
731 ; LMULMAX2-NEXT: vnot.v v8, v8
732 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
733 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
734 ; LMULMAX2-NEXT: lui a1, 5
735 ; LMULMAX2-NEXT: addi a1, a1, 1365
736 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
737 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
738 ; LMULMAX2-NEXT: lui a1, 3
739 ; LMULMAX2-NEXT: addi a1, a1, 819
740 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
741 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
742 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
743 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
744 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
745 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
746 ; LMULMAX2-NEXT: lui a1, 1
747 ; LMULMAX2-NEXT: addi a1, a1, -241
748 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
749 ; LMULMAX2-NEXT: li a1, 257
750 ; LMULMAX2-NEXT: vmul.vx v8, v8, a1
751 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
752 ; LMULMAX2-NEXT: vse16.v v8, (a0)
755 ; LMULMAX1-LABEL: cttz_v16i16:
757 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
758 ; LMULMAX1-NEXT: addi a1, a0, 16
759 ; LMULMAX1-NEXT: vle16.v v8, (a1)
760 ; LMULMAX1-NEXT: vle16.v v9, (a0)
761 ; LMULMAX1-NEXT: li a2, 1
762 ; LMULMAX1-NEXT: vsub.vx v10, v8, a2
763 ; LMULMAX1-NEXT: vnot.v v8, v8
764 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
765 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
766 ; LMULMAX1-NEXT: lui a3, 5
767 ; LMULMAX1-NEXT: addi a3, a3, 1365
768 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
769 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
770 ; LMULMAX1-NEXT: lui a4, 3
771 ; LMULMAX1-NEXT: addi a4, a4, 819
772 ; LMULMAX1-NEXT: vand.vx v10, v8, a4
773 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
774 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
775 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
776 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
777 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
778 ; LMULMAX1-NEXT: lui a5, 1
779 ; LMULMAX1-NEXT: addi a5, a5, -241
780 ; LMULMAX1-NEXT: vand.vx v8, v8, a5
781 ; LMULMAX1-NEXT: li a6, 257
782 ; LMULMAX1-NEXT: vmul.vx v8, v8, a6
783 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
784 ; LMULMAX1-NEXT: vsub.vx v10, v9, a2
785 ; LMULMAX1-NEXT: vnot.v v9, v9
786 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
787 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
788 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
789 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
790 ; LMULMAX1-NEXT: vand.vx v10, v9, a4
791 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
792 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
793 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
794 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
795 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
796 ; LMULMAX1-NEXT: vand.vx v9, v9, a5
797 ; LMULMAX1-NEXT: vmul.vx v9, v9, a6
798 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
799 ; LMULMAX1-NEXT: vse16.v v9, (a0)
800 ; LMULMAX1-NEXT: vse16.v v8, (a1)
803 ; LMULMAX8-LABEL: cttz_v16i16:
805 ; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma
806 ; LMULMAX8-NEXT: vle16.v v8, (a0)
807 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
808 ; LMULMAX8-NEXT: vand.vv v10, v8, v10
809 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10
810 ; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23
811 ; LMULMAX8-NEXT: li a1, 127
812 ; LMULMAX8-NEXT: vsub.vx v10, v10, a1
813 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
814 ; LMULMAX8-NEXT: li a1, 16
815 ; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0
816 ; LMULMAX8-NEXT: vse16.v v8, (a0)
819 ; ZVBB-LABEL: cttz_v16i16:
821 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
822 ; ZVBB-NEXT: vle16.v v8, (a0)
823 ; ZVBB-NEXT: vctz.v v8, v8
824 ; ZVBB-NEXT: vse16.v v8, (a0)
826 %a = load <16 x i16>, ptr %x
827 %b = load <16 x i16>, ptr %y
828 %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
829 store <16 x i16> %c, ptr %x
832 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
834 define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
835 ; LMULMAX2-RV32I-LABEL: cttz_v8i32:
836 ; LMULMAX2-RV32I: # %bb.0:
837 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
838 ; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0)
839 ; LMULMAX2-RV32I-NEXT: li a1, 1
840 ; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
841 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
842 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
843 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
844 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
845 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
846 ; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1
847 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
848 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
849 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
850 ; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1
851 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
852 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
853 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
854 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
855 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
856 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
857 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
858 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
859 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
860 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
861 ; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1
862 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24
863 ; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0)
864 ; LMULMAX2-RV32I-NEXT: ret
866 ; LMULMAX2-RV64I-LABEL: cttz_v8i32:
867 ; LMULMAX2-RV64I: # %bb.0:
868 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
869 ; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0)
870 ; LMULMAX2-RV64I-NEXT: li a1, 1
871 ; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1
872 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
873 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10
874 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
875 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
876 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
877 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
878 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
879 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
880 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
881 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
882 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
883 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
884 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8
885 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
886 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
887 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
888 ; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
889 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
890 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
891 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
892 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
893 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
894 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
895 ; LMULMAX2-RV64I-NEXT: ret
897 ; LMULMAX2-RV32F-LABEL: cttz_v8i32:
898 ; LMULMAX2-RV32F: # %bb.0:
899 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
900 ; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0)
901 ; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0
902 ; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10
903 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
904 ; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v10
905 ; LMULMAX2-RV32F-NEXT: fsrm a1
906 ; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23
907 ; LMULMAX2-RV32F-NEXT: li a1, 127
908 ; LMULMAX2-RV32F-NEXT: vsub.vx v10, v10, a1
909 ; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0
910 ; LMULMAX2-RV32F-NEXT: li a1, 32
911 ; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0
912 ; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0)
913 ; LMULMAX2-RV32F-NEXT: ret
915 ; LMULMAX2-RV64F-LABEL: cttz_v8i32:
916 ; LMULMAX2-RV64F: # %bb.0:
917 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
918 ; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0)
919 ; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0
920 ; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10
921 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
922 ; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v10
923 ; LMULMAX2-RV64F-NEXT: fsrm a1
924 ; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23
925 ; LMULMAX2-RV64F-NEXT: li a1, 127
926 ; LMULMAX2-RV64F-NEXT: vsub.vx v10, v10, a1
927 ; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0
928 ; LMULMAX2-RV64F-NEXT: li a1, 32
929 ; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0
930 ; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0)
931 ; LMULMAX2-RV64F-NEXT: ret
933 ; LMULMAX2-RV32D-LABEL: cttz_v8i32:
934 ; LMULMAX2-RV32D: # %bb.0:
935 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma
936 ; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0)
937 ; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0
938 ; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10
939 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1
940 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10
941 ; LMULMAX2-RV32D-NEXT: fsrm a1
942 ; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23
943 ; LMULMAX2-RV32D-NEXT: li a1, 127
944 ; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1
945 ; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0
946 ; LMULMAX2-RV32D-NEXT: li a1, 32
947 ; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0
948 ; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0)
949 ; LMULMAX2-RV32D-NEXT: ret
951 ; LMULMAX2-RV64D-LABEL: cttz_v8i32:
952 ; LMULMAX2-RV64D: # %bb.0:
953 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma
954 ; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0)
955 ; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0
956 ; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10
957 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1
958 ; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v10
959 ; LMULMAX2-RV64D-NEXT: fsrm a1
960 ; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23
961 ; LMULMAX2-RV64D-NEXT: li a1, 127
962 ; LMULMAX2-RV64D-NEXT: vsub.vx v10, v10, a1
963 ; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0
964 ; LMULMAX2-RV64D-NEXT: li a1, 32
965 ; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0
966 ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0)
967 ; LMULMAX2-RV64D-NEXT: ret
969 ; LMULMAX8-LABEL: cttz_v8i32:
971 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
972 ; LMULMAX8-NEXT: vle32.v v8, (a0)
973 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
974 ; LMULMAX8-NEXT: vand.vv v10, v8, v10
975 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10
976 ; LMULMAX8-NEXT: li a1, 52
977 ; LMULMAX8-NEXT: vnsrl.wx v10, v12, a1
978 ; LMULMAX8-NEXT: li a1, 1023
979 ; LMULMAX8-NEXT: vsub.vx v10, v10, a1
980 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
981 ; LMULMAX8-NEXT: li a1, 32
982 ; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0
983 ; LMULMAX8-NEXT: vse32.v v8, (a0)
986 ; ZVBB-LABEL: cttz_v8i32:
988 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
989 ; ZVBB-NEXT: vle32.v v8, (a0)
990 ; ZVBB-NEXT: vctz.v v8, v8
991 ; ZVBB-NEXT: vse32.v v8, (a0)
993 %a = load <8 x i32>, ptr %x
994 %b = load <8 x i32>, ptr %y
995 %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
996 store <8 x i32> %c, ptr %x
999 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
1001 define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
1002 ; LMULMAX2-RV32I-LABEL: cttz_v4i64:
1003 ; LMULMAX2-RV32I: # %bb.0:
1004 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1005 ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
1006 ; LMULMAX2-RV32I-NEXT: li a1, 1
1007 ; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
1008 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
1009 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
1010 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
1011 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
1012 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
1013 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1014 ; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1
1015 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1016 ; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12
1017 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
1018 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
1019 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
1020 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1021 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
1022 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1023 ; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10
1024 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
1025 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
1026 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8
1027 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
1028 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
1029 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
1030 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
1031 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1032 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
1033 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1034 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
1035 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
1036 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
1037 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1038 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
1039 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1040 ; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
1041 ; LMULMAX2-RV32I-NEXT: li a1, 56
1042 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
1043 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
1044 ; LMULMAX2-RV32I-NEXT: ret
1046 ; LMULMAX2-RV64I-LABEL: cttz_v4i64:
1047 ; LMULMAX2-RV64I: # %bb.0:
1048 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1049 ; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0)
1050 ; LMULMAX2-RV64I-NEXT: li a1, 1
1051 ; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1
1052 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
1053 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10
1054 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
1055 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
1056 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
1057 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1058 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1059 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
1060 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
1061 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
1062 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
1063 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1064 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1065 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
1066 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
1067 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1068 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8
1069 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
1070 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
1071 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
1072 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
1073 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1074 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1075 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1076 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
1077 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
1078 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1079 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1080 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
1081 ; LMULMAX2-RV64I-NEXT: li a1, 56
1082 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1
1083 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0)
1084 ; LMULMAX2-RV64I-NEXT: ret
1086 ; LMULMAX2-RV32F-LABEL: cttz_v4i64:
1087 ; LMULMAX2-RV32F: # %bb.0:
1088 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1089 ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0)
1090 ; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0
1091 ; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10
1092 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
1093 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1094 ; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v10
1095 ; LMULMAX2-RV32F-NEXT: fsrm a1
1096 ; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v12, 23
1097 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1098 ; LMULMAX2-RV32F-NEXT: vzext.vf2 v12, v10
1099 ; LMULMAX2-RV32F-NEXT: li a1, 127
1100 ; LMULMAX2-RV32F-NEXT: vsub.vx v10, v12, a1
1101 ; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0
1102 ; LMULMAX2-RV32F-NEXT: li a1, 64
1103 ; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0
1104 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0)
1105 ; LMULMAX2-RV32F-NEXT: ret
1107 ; LMULMAX2-RV64F-LABEL: cttz_v4i64:
1108 ; LMULMAX2-RV64F: # %bb.0:
1109 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1110 ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0)
1111 ; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0
1112 ; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10
1113 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
1114 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1115 ; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v12, v10
1116 ; LMULMAX2-RV64F-NEXT: fsrm a1
1117 ; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v12, 23
1118 ; LMULMAX2-RV64F-NEXT: li a1, 127
1119 ; LMULMAX2-RV64F-NEXT: vwsubu.vx v12, v10, a1
1120 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1121 ; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0
1122 ; LMULMAX2-RV64F-NEXT: li a1, 64
1123 ; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v12, a1, v0
1124 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0)
1125 ; LMULMAX2-RV64F-NEXT: ret
1127 ; LMULMAX2-RV32D-LABEL: cttz_v4i64:
1128 ; LMULMAX2-RV32D: # %bb.0:
1129 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1130 ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0)
1131 ; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0
1132 ; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10
1133 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1
1134 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10
1135 ; LMULMAX2-RV32D-NEXT: fsrm a1
1136 ; LMULMAX2-RV32D-NEXT: li a1, 52
1137 ; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1
1138 ; LMULMAX2-RV32D-NEXT: li a1, 1023
1139 ; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1
1140 ; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0
1141 ; LMULMAX2-RV32D-NEXT: li a1, 64
1142 ; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0
1143 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0)
1144 ; LMULMAX2-RV32D-NEXT: ret
1146 ; LMULMAX2-RV64D-LABEL: cttz_v4i64:
1147 ; LMULMAX2-RV64D: # %bb.0:
1148 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1149 ; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0)
1150 ; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0
1151 ; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10
1152 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1
1153 ; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v10
1154 ; LMULMAX2-RV64D-NEXT: fsrm a1
1155 ; LMULMAX2-RV64D-NEXT: li a1, 52
1156 ; LMULMAX2-RV64D-NEXT: vsrl.vx v10, v10, a1
1157 ; LMULMAX2-RV64D-NEXT: li a1, 1023
1158 ; LMULMAX2-RV64D-NEXT: vsub.vx v10, v10, a1
1159 ; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0
1160 ; LMULMAX2-RV64D-NEXT: li a1, 64
1161 ; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0
1162 ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0)
1163 ; LMULMAX2-RV64D-NEXT: ret
1165 ; LMULMAX8-LABEL: cttz_v4i64:
1166 ; LMULMAX8: # %bb.0:
1167 ; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1168 ; LMULMAX8-NEXT: vle64.v v8, (a0)
1169 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
1170 ; LMULMAX8-NEXT: vand.vv v10, v8, v10
1171 ; LMULMAX8-NEXT: fsrmi a1, 1
1172 ; LMULMAX8-NEXT: vfcvt.f.xu.v v10, v10
1173 ; LMULMAX8-NEXT: fsrm a1
1174 ; LMULMAX8-NEXT: li a1, 52
1175 ; LMULMAX8-NEXT: vsrl.vx v10, v10, a1
1176 ; LMULMAX8-NEXT: li a1, 1023
1177 ; LMULMAX8-NEXT: vsub.vx v10, v10, a1
1178 ; LMULMAX8-NEXT: vmseq.vi v0, v8, 0
1179 ; LMULMAX8-NEXT: li a1, 64
1180 ; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0
1181 ; LMULMAX8-NEXT: vse64.v v8, (a0)
1182 ; LMULMAX8-NEXT: ret
1184 ; ZVBB-LABEL: cttz_v4i64:
1186 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1187 ; ZVBB-NEXT: vle64.v v8, (a0)
1188 ; ZVBB-NEXT: vctz.v v8, v8
1189 ; ZVBB-NEXT: vse64.v v8, (a0)
1191 %a = load <4 x i64>, ptr %x
1192 %b = load <4 x i64>, ptr %y
1193 %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
1194 store <4 x i64> %c, ptr %x
1197 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
1199 define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
1200 ; CHECK-LABEL: cttz_zero_undef_v16i8:
1202 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1203 ; CHECK-NEXT: vle8.v v8, (a0)
1204 ; CHECK-NEXT: li a1, 1
1205 ; CHECK-NEXT: vsub.vx v9, v8, a1
1206 ; CHECK-NEXT: vnot.v v8, v8
1207 ; CHECK-NEXT: vand.vv v8, v8, v9
1208 ; CHECK-NEXT: vsrl.vi v9, v8, 1
1209 ; CHECK-NEXT: li a1, 85
1210 ; CHECK-NEXT: vand.vx v9, v9, a1
1211 ; CHECK-NEXT: vsub.vv v8, v8, v9
1212 ; CHECK-NEXT: li a1, 51
1213 ; CHECK-NEXT: vand.vx v9, v8, a1
1214 ; CHECK-NEXT: vsrl.vi v8, v8, 2
1215 ; CHECK-NEXT: vand.vx v8, v8, a1
1216 ; CHECK-NEXT: vadd.vv v8, v9, v8
1217 ; CHECK-NEXT: vsrl.vi v9, v8, 4
1218 ; CHECK-NEXT: vadd.vv v8, v8, v9
1219 ; CHECK-NEXT: vand.vi v8, v8, 15
1220 ; CHECK-NEXT: vse8.v v8, (a0)
1223 ; LMULMAX8-LABEL: cttz_zero_undef_v16i8:
1224 ; LMULMAX8: # %bb.0:
1225 ; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1226 ; LMULMAX8-NEXT: vle8.v v8, (a0)
1227 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
1228 ; LMULMAX8-NEXT: vand.vv v8, v8, v9
1229 ; LMULMAX8-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1230 ; LMULMAX8-NEXT: vzext.vf2 v10, v8
1231 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10
1232 ; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23
1233 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1234 ; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0
1235 ; LMULMAX8-NEXT: li a1, 127
1236 ; LMULMAX8-NEXT: vsub.vx v8, v10, a1
1237 ; LMULMAX8-NEXT: vse8.v v8, (a0)
1238 ; LMULMAX8-NEXT: ret
1240 ; ZVBB-LABEL: cttz_zero_undef_v16i8:
1242 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1243 ; ZVBB-NEXT: vle8.v v8, (a0)
1244 ; ZVBB-NEXT: vctz.v v8, v8
1245 ; ZVBB-NEXT: vse8.v v8, (a0)
1247 %a = load <16 x i8>, ptr %x
1248 %b = load <16 x i8>, ptr %y
1249 %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
1250 store <16 x i8> %c, ptr %x
1254 define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
1255 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v8i16:
1256 ; LMULMAX2-RV32I: # %bb.0:
1257 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1258 ; LMULMAX2-RV32I-NEXT: vle16.v v8, (a0)
1259 ; LMULMAX2-RV32I-NEXT: li a1, 1
1260 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
1261 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
1262 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
1263 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
1264 ; LMULMAX2-RV32I-NEXT: lui a1, 5
1265 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
1266 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1
1267 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
1268 ; LMULMAX2-RV32I-NEXT: lui a1, 3
1269 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
1270 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1
1271 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
1272 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
1273 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
1274 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
1275 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
1276 ; LMULMAX2-RV32I-NEXT: lui a1, 1
1277 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
1278 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
1279 ; LMULMAX2-RV32I-NEXT: li a1, 257
1280 ; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1
1281 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 8
1282 ; LMULMAX2-RV32I-NEXT: vse16.v v8, (a0)
1283 ; LMULMAX2-RV32I-NEXT: ret
1285 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v8i16:
1286 ; LMULMAX2-RV64I: # %bb.0:
1287 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1288 ; LMULMAX2-RV64I-NEXT: vle16.v v8, (a0)
1289 ; LMULMAX2-RV64I-NEXT: li a1, 1
1290 ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1
1291 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
1292 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
1293 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
1294 ; LMULMAX2-RV64I-NEXT: lui a1, 5
1295 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
1296 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
1297 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
1298 ; LMULMAX2-RV64I-NEXT: lui a1, 3
1299 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
1300 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
1301 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
1302 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1303 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8
1304 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
1305 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
1306 ; LMULMAX2-RV64I-NEXT: lui a1, 1
1307 ; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
1308 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1309 ; LMULMAX2-RV64I-NEXT: li a1, 257
1310 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
1311 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 8
1312 ; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0)
1313 ; LMULMAX2-RV64I-NEXT: ret
1315 ; LMULMAX1-LABEL: cttz_zero_undef_v8i16:
1316 ; LMULMAX1: # %bb.0:
1317 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1318 ; LMULMAX1-NEXT: vle16.v v8, (a0)
1319 ; LMULMAX1-NEXT: li a1, 1
1320 ; LMULMAX1-NEXT: vsub.vx v9, v8, a1
1321 ; LMULMAX1-NEXT: vnot.v v8, v8
1322 ; LMULMAX1-NEXT: vand.vv v8, v8, v9
1323 ; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
1324 ; LMULMAX1-NEXT: lui a1, 5
1325 ; LMULMAX1-NEXT: addi a1, a1, 1365
1326 ; LMULMAX1-NEXT: vand.vx v9, v9, a1
1327 ; LMULMAX1-NEXT: vsub.vv v8, v8, v9
1328 ; LMULMAX1-NEXT: lui a1, 3
1329 ; LMULMAX1-NEXT: addi a1, a1, 819
1330 ; LMULMAX1-NEXT: vand.vx v9, v8, a1
1331 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
1332 ; LMULMAX1-NEXT: vand.vx v8, v8, a1
1333 ; LMULMAX1-NEXT: vadd.vv v8, v9, v8
1334 ; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
1335 ; LMULMAX1-NEXT: vadd.vv v8, v8, v9
1336 ; LMULMAX1-NEXT: lui a1, 1
1337 ; LMULMAX1-NEXT: addi a1, a1, -241
1338 ; LMULMAX1-NEXT: vand.vx v8, v8, a1
1339 ; LMULMAX1-NEXT: li a1, 257
1340 ; LMULMAX1-NEXT: vmul.vx v8, v8, a1
1341 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
1342 ; LMULMAX1-NEXT: vse16.v v8, (a0)
1343 ; LMULMAX1-NEXT: ret
1345 ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i16:
1346 ; LMULMAX2-RV32F: # %bb.0:
1347 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1348 ; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0)
1349 ; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0
1350 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9
1351 ; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8
1352 ; LMULMAX2-RV32F-NEXT: vnsrl.wi v8, v10, 23
1353 ; LMULMAX2-RV32F-NEXT: li a1, 127
1354 ; LMULMAX2-RV32F-NEXT: vsub.vx v8, v8, a1
1355 ; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0)
1356 ; LMULMAX2-RV32F-NEXT: ret
1358 ; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v8i16:
1359 ; LMULMAX2-RV64F: # %bb.0:
1360 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1361 ; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0)
1362 ; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0
1363 ; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v9
1364 ; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8
1365 ; LMULMAX2-RV64F-NEXT: vnsrl.wi v8, v10, 23
1366 ; LMULMAX2-RV64F-NEXT: li a1, 127
1367 ; LMULMAX2-RV64F-NEXT: vsub.vx v8, v8, a1
1368 ; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0)
1369 ; LMULMAX2-RV64F-NEXT: ret
1371 ; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v8i16:
1372 ; LMULMAX2-RV32D: # %bb.0:
1373 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1374 ; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0)
1375 ; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0
1376 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9
1377 ; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8
1378 ; LMULMAX2-RV32D-NEXT: vnsrl.wi v8, v10, 23
1379 ; LMULMAX2-RV32D-NEXT: li a1, 127
1380 ; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1
1381 ; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0)
1382 ; LMULMAX2-RV32D-NEXT: ret
1384 ; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v8i16:
1385 ; LMULMAX2-RV64D: # %bb.0:
1386 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1387 ; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0)
1388 ; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0
1389 ; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v9
1390 ; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8
1391 ; LMULMAX2-RV64D-NEXT: vnsrl.wi v8, v10, 23
1392 ; LMULMAX2-RV64D-NEXT: li a1, 127
1393 ; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1
1394 ; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0)
1395 ; LMULMAX2-RV64D-NEXT: ret
1397 ; LMULMAX8-LABEL: cttz_zero_undef_v8i16:
1398 ; LMULMAX8: # %bb.0:
1399 ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1400 ; LMULMAX8-NEXT: vle16.v v8, (a0)
1401 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
1402 ; LMULMAX8-NEXT: vand.vv v8, v8, v9
1403 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8
1404 ; LMULMAX8-NEXT: vnsrl.wi v8, v10, 23
1405 ; LMULMAX8-NEXT: li a1, 127
1406 ; LMULMAX8-NEXT: vsub.vx v8, v8, a1
1407 ; LMULMAX8-NEXT: vse16.v v8, (a0)
1408 ; LMULMAX8-NEXT: ret
1410 ; ZVBB-LABEL: cttz_zero_undef_v8i16:
1412 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1413 ; ZVBB-NEXT: vle16.v v8, (a0)
1414 ; ZVBB-NEXT: vctz.v v8, v8
1415 ; ZVBB-NEXT: vse16.v v8, (a0)
1417 %a = load <8 x i16>, ptr %x
1418 %b = load <8 x i16>, ptr %y
1419 %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
1420 store <8 x i16> %c, ptr %x
1424 define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
1425 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i32:
1426 ; LMULMAX2-RV32I: # %bb.0:
1427 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1428 ; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0)
1429 ; LMULMAX2-RV32I-NEXT: li a1, 1
1430 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
1431 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
1432 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
1433 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
1434 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
1435 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
1436 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1
1437 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
1438 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
1439 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
1440 ; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1
1441 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
1442 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
1443 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8
1444 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
1445 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
1446 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
1447 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
1448 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
1449 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
1450 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
1451 ; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1
1452 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24
1453 ; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0)
1454 ; LMULMAX2-RV32I-NEXT: ret
1456 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i32:
1457 ; LMULMAX2-RV64I: # %bb.0:
1458 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1459 ; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0)
1460 ; LMULMAX2-RV64I-NEXT: li a1, 1
1461 ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1
1462 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
1463 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
1464 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
1465 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
1466 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
1467 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
1468 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
1469 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
1470 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
1471 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
1472 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
1473 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1474 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8
1475 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
1476 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
1477 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
1478 ; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
1479 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1480 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
1481 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
1482 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
1483 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
1484 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
1485 ; LMULMAX2-RV64I-NEXT: ret
1487 ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v4i32:
1488 ; LMULMAX2-RV32F: # %bb.0:
1489 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1490 ; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0)
1491 ; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0
1492 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9
1493 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
1494 ; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8
1495 ; LMULMAX2-RV32F-NEXT: fsrm a1
1496 ; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23
1497 ; LMULMAX2-RV32F-NEXT: li a1, 127
1498 ; LMULMAX2-RV32F-NEXT: vsub.vx v8, v8, a1
1499 ; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0)
1500 ; LMULMAX2-RV32F-NEXT: ret
1502 ; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v4i32:
1503 ; LMULMAX2-RV64F: # %bb.0:
1504 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1505 ; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0)
1506 ; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0
1507 ; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v9
1508 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
1509 ; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8
1510 ; LMULMAX2-RV64F-NEXT: fsrm a1
1511 ; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23
1512 ; LMULMAX2-RV64F-NEXT: li a1, 127
1513 ; LMULMAX2-RV64F-NEXT: vsub.vx v8, v8, a1
1514 ; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0)
1515 ; LMULMAX2-RV64F-NEXT: ret
1517 ; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v4i32:
1518 ; LMULMAX2-RV32D: # %bb.0:
1519 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1520 ; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0)
1521 ; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0
1522 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9
1523 ; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8
1524 ; LMULMAX2-RV32D-NEXT: li a1, 52
1525 ; LMULMAX2-RV32D-NEXT: vnsrl.wx v8, v10, a1
1526 ; LMULMAX2-RV32D-NEXT: li a1, 1023
1527 ; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1
1528 ; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0)
1529 ; LMULMAX2-RV32D-NEXT: ret
1531 ; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v4i32:
1532 ; LMULMAX2-RV64D: # %bb.0:
1533 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1534 ; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0)
1535 ; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0
1536 ; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v9
1537 ; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8
1538 ; LMULMAX2-RV64D-NEXT: li a1, 52
1539 ; LMULMAX2-RV64D-NEXT: vnsrl.wx v8, v10, a1
1540 ; LMULMAX2-RV64D-NEXT: li a1, 1023
1541 ; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1
1542 ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0)
1543 ; LMULMAX2-RV64D-NEXT: ret
1545 ; LMULMAX8-LABEL: cttz_zero_undef_v4i32:
1546 ; LMULMAX8: # %bb.0:
1547 ; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1548 ; LMULMAX8-NEXT: vle32.v v8, (a0)
1549 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
1550 ; LMULMAX8-NEXT: vand.vv v8, v8, v9
1551 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8
1552 ; LMULMAX8-NEXT: li a1, 52
1553 ; LMULMAX8-NEXT: vnsrl.wx v8, v10, a1
1554 ; LMULMAX8-NEXT: li a1, 1023
1555 ; LMULMAX8-NEXT: vsub.vx v8, v8, a1
1556 ; LMULMAX8-NEXT: vse32.v v8, (a0)
1557 ; LMULMAX8-NEXT: ret
1559 ; ZVBB-LABEL: cttz_zero_undef_v4i32:
1561 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1562 ; ZVBB-NEXT: vle32.v v8, (a0)
1563 ; ZVBB-NEXT: vctz.v v8, v8
1564 ; ZVBB-NEXT: vse32.v v8, (a0)
1566 %a = load <4 x i32>, ptr %x
1567 %b = load <4 x i32>, ptr %y
1568 %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
1569 store <4 x i32> %c, ptr %x
1573 define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
1574 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v2i64:
1575 ; LMULMAX2-RV32I: # %bb.0:
1576 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1577 ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
1578 ; LMULMAX2-RV32I-NEXT: li a1, 1
1579 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1
1580 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
1581 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
1582 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1
1583 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
1584 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
1585 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1586 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
1587 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1588 ; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10
1589 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9
1590 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
1591 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
1592 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1593 ; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
1594 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1595 ; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9
1596 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
1597 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
1598 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
1599 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4
1600 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9
1601 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
1602 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
1603 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1604 ; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
1605 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1606 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9
1607 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
1608 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
1609 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1610 ; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1
1611 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1612 ; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9
1613 ; LMULMAX2-RV32I-NEXT: li a1, 56
1614 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
1615 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
1616 ; LMULMAX2-RV32I-NEXT: ret
1618 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v2i64:
1619 ; LMULMAX2-RV64I: # %bb.0:
1620 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1621 ; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0)
1622 ; LMULMAX2-RV64I-NEXT: li a1, 1
1623 ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1
1624 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
1625 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
1626 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
1627 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
1628 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
1629 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1630 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1631 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
1632 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
1633 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
1634 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
1635 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1636 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1637 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
1638 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
1639 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1640 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8
1641 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
1642 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
1643 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
1644 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
1645 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1646 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1647 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
1648 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
1649 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
1650 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
1651 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
1652 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
1653 ; LMULMAX2-RV64I-NEXT: li a1, 56
1654 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1
1655 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0)
1656 ; LMULMAX2-RV64I-NEXT: ret
1658 ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v2i64:
1659 ; LMULMAX2-RV32F: # %bb.0:
1660 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1661 ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0)
1662 ; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0
1663 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9
1664 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
1665 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1666 ; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8
1667 ; LMULMAX2-RV32F-NEXT: fsrm a1
1668 ; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23
1669 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1670 ; LMULMAX2-RV32F-NEXT: vzext.vf2 v9, v8
1671 ; LMULMAX2-RV32F-NEXT: li a1, 127
1672 ; LMULMAX2-RV32F-NEXT: vsub.vx v8, v9, a1
1673 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0)
1674 ; LMULMAX2-RV32F-NEXT: ret
1676 ; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v2i64:
1677 ; LMULMAX2-RV64F: # %bb.0:
1678 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1679 ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0)
1680 ; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0
1681 ; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v9
1682 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
1683 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1684 ; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8
1685 ; LMULMAX2-RV64F-NEXT: fsrm a1
1686 ; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23
1687 ; LMULMAX2-RV64F-NEXT: li a1, 127
1688 ; LMULMAX2-RV64F-NEXT: vwsubu.vx v9, v8, a1
1689 ; LMULMAX2-RV64F-NEXT: vse64.v v9, (a0)
1690 ; LMULMAX2-RV64F-NEXT: ret
1692 ; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v2i64:
1693 ; LMULMAX2-RV32D: # %bb.0:
1694 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1695 ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0)
1696 ; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0
1697 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9
1698 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1
1699 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8
1700 ; LMULMAX2-RV32D-NEXT: fsrm a1
1701 ; LMULMAX2-RV32D-NEXT: li a1, 52
1702 ; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1
1703 ; LMULMAX2-RV32D-NEXT: li a1, 1023
1704 ; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1
1705 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0)
1706 ; LMULMAX2-RV32D-NEXT: ret
1708 ; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v2i64:
1709 ; LMULMAX2-RV64D: # %bb.0:
1710 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1711 ; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0)
1712 ; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0
1713 ; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v9
1714 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1
1715 ; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8
1716 ; LMULMAX2-RV64D-NEXT: fsrm a1
1717 ; LMULMAX2-RV64D-NEXT: li a1, 52
1718 ; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1
1719 ; LMULMAX2-RV64D-NEXT: li a1, 1023
1720 ; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1
1721 ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0)
1722 ; LMULMAX2-RV64D-NEXT: ret
1724 ; LMULMAX8-LABEL: cttz_zero_undef_v2i64:
1725 ; LMULMAX8: # %bb.0:
1726 ; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1727 ; LMULMAX8-NEXT: vle64.v v8, (a0)
1728 ; LMULMAX8-NEXT: vrsub.vi v9, v8, 0
1729 ; LMULMAX8-NEXT: vand.vv v8, v8, v9
1730 ; LMULMAX8-NEXT: fsrmi a1, 1
1731 ; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8
1732 ; LMULMAX8-NEXT: fsrm a1
1733 ; LMULMAX8-NEXT: li a1, 52
1734 ; LMULMAX8-NEXT: vsrl.vx v8, v8, a1
1735 ; LMULMAX8-NEXT: li a1, 1023
1736 ; LMULMAX8-NEXT: vsub.vx v8, v8, a1
1737 ; LMULMAX8-NEXT: vse64.v v8, (a0)
1738 ; LMULMAX8-NEXT: ret
1740 ; ZVBB-LABEL: cttz_zero_undef_v2i64:
1742 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1743 ; ZVBB-NEXT: vle64.v v8, (a0)
1744 ; ZVBB-NEXT: vctz.v v8, v8
1745 ; ZVBB-NEXT: vse64.v v8, (a0)
1747 %a = load <2 x i64>, ptr %x
1748 %b = load <2 x i64>, ptr %y
1749 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
1750 store <2 x i64> %c, ptr %x
1754 define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
1755 ; LMULMAX2-LABEL: cttz_zero_undef_v32i8:
1756 ; LMULMAX2: # %bb.0:
1757 ; LMULMAX2-NEXT: li a1, 32
1758 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1759 ; LMULMAX2-NEXT: vle8.v v8, (a0)
1760 ; LMULMAX2-NEXT: li a1, 1
1761 ; LMULMAX2-NEXT: vsub.vx v10, v8, a1
1762 ; LMULMAX2-NEXT: vnot.v v8, v8
1763 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
1764 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
1765 ; LMULMAX2-NEXT: li a1, 85
1766 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
1767 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
1768 ; LMULMAX2-NEXT: li a1, 51
1769 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
1770 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
1771 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
1772 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
1773 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
1774 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
1775 ; LMULMAX2-NEXT: vand.vi v8, v8, 15
1776 ; LMULMAX2-NEXT: vse8.v v8, (a0)
1777 ; LMULMAX2-NEXT: ret
1779 ; LMULMAX1-LABEL: cttz_zero_undef_v32i8:
1780 ; LMULMAX1: # %bb.0:
1781 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1782 ; LMULMAX1-NEXT: addi a1, a0, 16
1783 ; LMULMAX1-NEXT: vle8.v v8, (a1)
1784 ; LMULMAX1-NEXT: vle8.v v9, (a0)
1785 ; LMULMAX1-NEXT: li a2, 1
1786 ; LMULMAX1-NEXT: vsub.vx v10, v8, a2
1787 ; LMULMAX1-NEXT: vnot.v v8, v8
1788 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
1789 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
1790 ; LMULMAX1-NEXT: li a3, 85
1791 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
1792 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
1793 ; LMULMAX1-NEXT: li a4, 51
1794 ; LMULMAX1-NEXT: vand.vx v10, v8, a4
1795 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
1796 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
1797 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
1798 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
1799 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
1800 ; LMULMAX1-NEXT: vand.vi v8, v8, 15
1801 ; LMULMAX1-NEXT: vsub.vx v10, v9, a2
1802 ; LMULMAX1-NEXT: vnot.v v9, v9
1803 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
1804 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
1805 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
1806 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
1807 ; LMULMAX1-NEXT: vand.vx v10, v9, a4
1808 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
1809 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
1810 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
1811 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
1812 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
1813 ; LMULMAX1-NEXT: vand.vi v9, v9, 15
1814 ; LMULMAX1-NEXT: vse8.v v9, (a0)
1815 ; LMULMAX1-NEXT: vse8.v v8, (a1)
1816 ; LMULMAX1-NEXT: ret
1818 ; LMULMAX8-LABEL: cttz_zero_undef_v32i8:
1819 ; LMULMAX8: # %bb.0:
1820 ; LMULMAX8-NEXT: li a1, 32
1821 ; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1822 ; LMULMAX8-NEXT: vle8.v v8, (a0)
1823 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
1824 ; LMULMAX8-NEXT: vand.vv v8, v8, v10
1825 ; LMULMAX8-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1826 ; LMULMAX8-NEXT: vzext.vf2 v12, v8
1827 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12
1828 ; LMULMAX8-NEXT: vnsrl.wi v8, v16, 23
1829 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1830 ; LMULMAX8-NEXT: vnsrl.wi v12, v8, 0
1831 ; LMULMAX8-NEXT: li a1, 127
1832 ; LMULMAX8-NEXT: vsub.vx v8, v12, a1
1833 ; LMULMAX8-NEXT: vse8.v v8, (a0)
1834 ; LMULMAX8-NEXT: ret
1836 ; ZVBB-LABEL: cttz_zero_undef_v32i8:
1838 ; ZVBB-NEXT: li a1, 32
1839 ; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
1840 ; ZVBB-NEXT: vle8.v v8, (a0)
1841 ; ZVBB-NEXT: vctz.v v8, v8
1842 ; ZVBB-NEXT: vse8.v v8, (a0)
1844 %a = load <32 x i8>, ptr %x
1845 %b = load <32 x i8>, ptr %y
1846 %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
1847 store <32 x i8> %c, ptr %x
1851 define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
1852 ; LMULMAX2-LABEL: cttz_zero_undef_v16i16:
1853 ; LMULMAX2: # %bb.0:
1854 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1855 ; LMULMAX2-NEXT: vle16.v v8, (a0)
1856 ; LMULMAX2-NEXT: li a1, 1
1857 ; LMULMAX2-NEXT: vsub.vx v10, v8, a1
1858 ; LMULMAX2-NEXT: vnot.v v8, v8
1859 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
1860 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
1861 ; LMULMAX2-NEXT: lui a1, 5
1862 ; LMULMAX2-NEXT: addi a1, a1, 1365
1863 ; LMULMAX2-NEXT: vand.vx v10, v10, a1
1864 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
1865 ; LMULMAX2-NEXT: lui a1, 3
1866 ; LMULMAX2-NEXT: addi a1, a1, 819
1867 ; LMULMAX2-NEXT: vand.vx v10, v8, a1
1868 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
1869 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
1870 ; LMULMAX2-NEXT: vadd.vv v8, v10, v8
1871 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
1872 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
1873 ; LMULMAX2-NEXT: lui a1, 1
1874 ; LMULMAX2-NEXT: addi a1, a1, -241
1875 ; LMULMAX2-NEXT: vand.vx v8, v8, a1
1876 ; LMULMAX2-NEXT: li a1, 257
1877 ; LMULMAX2-NEXT: vmul.vx v8, v8, a1
1878 ; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
1879 ; LMULMAX2-NEXT: vse16.v v8, (a0)
1880 ; LMULMAX2-NEXT: ret
1882 ; LMULMAX1-LABEL: cttz_zero_undef_v16i16:
1883 ; LMULMAX1: # %bb.0:
1884 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1885 ; LMULMAX1-NEXT: addi a1, a0, 16
1886 ; LMULMAX1-NEXT: vle16.v v8, (a1)
1887 ; LMULMAX1-NEXT: vle16.v v9, (a0)
1888 ; LMULMAX1-NEXT: li a2, 1
1889 ; LMULMAX1-NEXT: vsub.vx v10, v8, a2
1890 ; LMULMAX1-NEXT: vnot.v v8, v8
1891 ; LMULMAX1-NEXT: vand.vv v8, v8, v10
1892 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
1893 ; LMULMAX1-NEXT: lui a3, 5
1894 ; LMULMAX1-NEXT: addi a3, a3, 1365
1895 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
1896 ; LMULMAX1-NEXT: vsub.vv v8, v8, v10
1897 ; LMULMAX1-NEXT: lui a4, 3
1898 ; LMULMAX1-NEXT: addi a4, a4, 819
1899 ; LMULMAX1-NEXT: vand.vx v10, v8, a4
1900 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
1901 ; LMULMAX1-NEXT: vand.vx v8, v8, a4
1902 ; LMULMAX1-NEXT: vadd.vv v8, v10, v8
1903 ; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
1904 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
1905 ; LMULMAX1-NEXT: lui a5, 1
1906 ; LMULMAX1-NEXT: addi a5, a5, -241
1907 ; LMULMAX1-NEXT: vand.vx v8, v8, a5
1908 ; LMULMAX1-NEXT: li a6, 257
1909 ; LMULMAX1-NEXT: vmul.vx v8, v8, a6
1910 ; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
1911 ; LMULMAX1-NEXT: vsub.vx v10, v9, a2
1912 ; LMULMAX1-NEXT: vnot.v v9, v9
1913 ; LMULMAX1-NEXT: vand.vv v9, v9, v10
1914 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
1915 ; LMULMAX1-NEXT: vand.vx v10, v10, a3
1916 ; LMULMAX1-NEXT: vsub.vv v9, v9, v10
1917 ; LMULMAX1-NEXT: vand.vx v10, v9, a4
1918 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
1919 ; LMULMAX1-NEXT: vand.vx v9, v9, a4
1920 ; LMULMAX1-NEXT: vadd.vv v9, v10, v9
1921 ; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
1922 ; LMULMAX1-NEXT: vadd.vv v9, v9, v10
1923 ; LMULMAX1-NEXT: vand.vx v9, v9, a5
1924 ; LMULMAX1-NEXT: vmul.vx v9, v9, a6
1925 ; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
1926 ; LMULMAX1-NEXT: vse16.v v9, (a0)
1927 ; LMULMAX1-NEXT: vse16.v v8, (a1)
1928 ; LMULMAX1-NEXT: ret
1930 ; LMULMAX8-LABEL: cttz_zero_undef_v16i16:
1931 ; LMULMAX8: # %bb.0:
1932 ; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1933 ; LMULMAX8-NEXT: vle16.v v8, (a0)
1934 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
1935 ; LMULMAX8-NEXT: vand.vv v8, v8, v10
1936 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8
1937 ; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23
1938 ; LMULMAX8-NEXT: li a1, 127
1939 ; LMULMAX8-NEXT: vsub.vx v8, v8, a1
1940 ; LMULMAX8-NEXT: vse16.v v8, (a0)
1941 ; LMULMAX8-NEXT: ret
1943 ; ZVBB-LABEL: cttz_zero_undef_v16i16:
1945 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1946 ; ZVBB-NEXT: vle16.v v8, (a0)
1947 ; ZVBB-NEXT: vctz.v v8, v8
1948 ; ZVBB-NEXT: vse16.v v8, (a0)
1950 %a = load <16 x i16>, ptr %x
1951 %b = load <16 x i16>, ptr %y
1952 %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
1953 store <16 x i16> %c, ptr %x
1957 define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
1958 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v8i32:
1959 ; LMULMAX2-RV32I: # %bb.0:
1960 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1961 ; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0)
1962 ; LMULMAX2-RV32I-NEXT: li a1, 1
1963 ; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
1964 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
1965 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
1966 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
1967 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
1968 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
1969 ; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1
1970 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
1971 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
1972 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
1973 ; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1
1974 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
1975 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
1976 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8
1977 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
1978 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
1979 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
1980 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
1981 ; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1
1982 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
1983 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
1984 ; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1
1985 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24
1986 ; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0)
1987 ; LMULMAX2-RV32I-NEXT: ret
1989 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v8i32:
1990 ; LMULMAX2-RV64I: # %bb.0:
1991 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1992 ; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0)
1993 ; LMULMAX2-RV64I-NEXT: li a1, 1
1994 ; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1
1995 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
1996 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10
1997 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
1998 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
1999 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
2000 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
2001 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
2002 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
2003 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
2004 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
2005 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
2006 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
2007 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8
2008 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
2009 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
2010 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
2011 ; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
2012 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
2013 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
2014 ; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
2015 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
2016 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
2017 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
2018 ; LMULMAX2-RV64I-NEXT: ret
2020 ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i32:
2021 ; LMULMAX2-RV32F: # %bb.0:
2022 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2023 ; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0)
2024 ; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0
2025 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10
2026 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
2027 ; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8
2028 ; LMULMAX2-RV32F-NEXT: fsrm a1
2029 ; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23
2030 ; LMULMAX2-RV32F-NEXT: li a1, 127
2031 ; LMULMAX2-RV32F-NEXT: vsub.vx v8, v8, a1
2032 ; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0)
2033 ; LMULMAX2-RV32F-NEXT: ret
2035 ; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v8i32:
2036 ; LMULMAX2-RV64F: # %bb.0:
2037 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2038 ; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0)
2039 ; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0
2040 ; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v10
2041 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
2042 ; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8
2043 ; LMULMAX2-RV64F-NEXT: fsrm a1
2044 ; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23
2045 ; LMULMAX2-RV64F-NEXT: li a1, 127
2046 ; LMULMAX2-RV64F-NEXT: vsub.vx v8, v8, a1
2047 ; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0)
2048 ; LMULMAX2-RV64F-NEXT: ret
2050 ; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v8i32:
2051 ; LMULMAX2-RV32D: # %bb.0:
2052 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2053 ; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0)
2054 ; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0
2055 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10
2056 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1
2057 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8
2058 ; LMULMAX2-RV32D-NEXT: fsrm a1
2059 ; LMULMAX2-RV32D-NEXT: vsrl.vi v8, v8, 23
2060 ; LMULMAX2-RV32D-NEXT: li a1, 127
2061 ; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1
2062 ; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0)
2063 ; LMULMAX2-RV32D-NEXT: ret
2065 ; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v8i32:
2066 ; LMULMAX2-RV64D: # %bb.0:
2067 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2068 ; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0)
2069 ; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0
2070 ; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v10
2071 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1
2072 ; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8
2073 ; LMULMAX2-RV64D-NEXT: fsrm a1
2074 ; LMULMAX2-RV64D-NEXT: vsrl.vi v8, v8, 23
2075 ; LMULMAX2-RV64D-NEXT: li a1, 127
2076 ; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1
2077 ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0)
2078 ; LMULMAX2-RV64D-NEXT: ret
2080 ; LMULMAX8-LABEL: cttz_zero_undef_v8i32:
2081 ; LMULMAX8: # %bb.0:
2082 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2083 ; LMULMAX8-NEXT: vle32.v v8, (a0)
2084 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
2085 ; LMULMAX8-NEXT: vand.vv v8, v8, v10
2086 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8
2087 ; LMULMAX8-NEXT: li a1, 52
2088 ; LMULMAX8-NEXT: vnsrl.wx v8, v12, a1
2089 ; LMULMAX8-NEXT: li a1, 1023
2090 ; LMULMAX8-NEXT: vsub.vx v8, v8, a1
2091 ; LMULMAX8-NEXT: vse32.v v8, (a0)
2092 ; LMULMAX8-NEXT: ret
2094 ; ZVBB-LABEL: cttz_zero_undef_v8i32:
2096 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2097 ; ZVBB-NEXT: vle32.v v8, (a0)
2098 ; ZVBB-NEXT: vctz.v v8, v8
2099 ; ZVBB-NEXT: vse32.v v8, (a0)
2101 %a = load <8 x i32>, ptr %x
2102 %b = load <8 x i32>, ptr %y
2103 %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
2104 store <8 x i32> %c, ptr %x
2108 define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
2109 ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i64:
2110 ; LMULMAX2-RV32I: # %bb.0:
2111 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2112 ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0)
2113 ; LMULMAX2-RV32I-NEXT: li a1, 1
2114 ; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1
2115 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8
2116 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
2117 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1
2118 ; LMULMAX2-RV32I-NEXT: lui a1, 349525
2119 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365
2120 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
2121 ; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1
2122 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2123 ; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12
2124 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10
2125 ; LMULMAX2-RV32I-NEXT: lui a1, 209715
2126 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819
2127 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
2128 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
2129 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2130 ; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10
2131 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2
2132 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
2133 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8
2134 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4
2135 ; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10
2136 ; LMULMAX2-RV32I-NEXT: lui a1, 61681
2137 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241
2138 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
2139 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
2140 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2141 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10
2142 ; LMULMAX2-RV32I-NEXT: lui a1, 4112
2143 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257
2144 ; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma
2145 ; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1
2146 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2147 ; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10
2148 ; LMULMAX2-RV32I-NEXT: li a1, 56
2149 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1
2150 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0)
2151 ; LMULMAX2-RV32I-NEXT: ret
2153 ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i64:
2154 ; LMULMAX2-RV64I: # %bb.0:
2155 ; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2156 ; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0)
2157 ; LMULMAX2-RV64I-NEXT: li a1, 1
2158 ; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1
2159 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
2160 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10
2161 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
2162 ; LMULMAX2-RV64I-NEXT: lui a1, 349525
2163 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
2164 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
2165 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
2166 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
2167 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
2168 ; LMULMAX2-RV64I-NEXT: lui a1, 209715
2169 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
2170 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
2171 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
2172 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
2173 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
2174 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
2175 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8
2176 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
2177 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
2178 ; LMULMAX2-RV64I-NEXT: lui a1, 61681
2179 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
2180 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
2181 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
2182 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
2183 ; LMULMAX2-RV64I-NEXT: lui a1, 4112
2184 ; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
2185 ; LMULMAX2-RV64I-NEXT: slli a2, a1, 32
2186 ; LMULMAX2-RV64I-NEXT: add a1, a1, a2
2187 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
2188 ; LMULMAX2-RV64I-NEXT: li a1, 56
2189 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1
2190 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0)
2191 ; LMULMAX2-RV64I-NEXT: ret
2193 ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v4i64:
2194 ; LMULMAX2-RV32F: # %bb.0:
2195 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2196 ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0)
2197 ; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0
2198 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10
2199 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1
2200 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2201 ; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8
2202 ; LMULMAX2-RV32F-NEXT: fsrm a1
2203 ; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23
2204 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
2205 ; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v8
2206 ; LMULMAX2-RV32F-NEXT: li a1, 127
2207 ; LMULMAX2-RV32F-NEXT: vsub.vx v8, v10, a1
2208 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0)
2209 ; LMULMAX2-RV32F-NEXT: ret
2211 ; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v4i64:
2212 ; LMULMAX2-RV64F: # %bb.0:
2213 ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2214 ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0)
2215 ; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0
2216 ; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v10
2217 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1
2218 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2219 ; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8
2220 ; LMULMAX2-RV64F-NEXT: fsrm a1
2221 ; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23
2222 ; LMULMAX2-RV64F-NEXT: li a1, 127
2223 ; LMULMAX2-RV64F-NEXT: vwsubu.vx v10, v8, a1
2224 ; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0)
2225 ; LMULMAX2-RV64F-NEXT: ret
2227 ; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v4i64:
2228 ; LMULMAX2-RV32D: # %bb.0:
2229 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2230 ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0)
2231 ; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0
2232 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10
2233 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1
2234 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8
2235 ; LMULMAX2-RV32D-NEXT: fsrm a1
2236 ; LMULMAX2-RV32D-NEXT: li a1, 52
2237 ; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1
2238 ; LMULMAX2-RV32D-NEXT: li a1, 1023
2239 ; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1
2240 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0)
2241 ; LMULMAX2-RV32D-NEXT: ret
2243 ; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v4i64:
2244 ; LMULMAX2-RV64D: # %bb.0:
2245 ; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2246 ; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0)
2247 ; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0
2248 ; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v10
2249 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1
2250 ; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8
2251 ; LMULMAX2-RV64D-NEXT: fsrm a1
2252 ; LMULMAX2-RV64D-NEXT: li a1, 52
2253 ; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1
2254 ; LMULMAX2-RV64D-NEXT: li a1, 1023
2255 ; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1
2256 ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0)
2257 ; LMULMAX2-RV64D-NEXT: ret
2259 ; LMULMAX8-LABEL: cttz_zero_undef_v4i64:
2260 ; LMULMAX8: # %bb.0:
2261 ; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2262 ; LMULMAX8-NEXT: vle64.v v8, (a0)
2263 ; LMULMAX8-NEXT: vrsub.vi v10, v8, 0
2264 ; LMULMAX8-NEXT: vand.vv v8, v8, v10
2265 ; LMULMAX8-NEXT: fsrmi a1, 1
2266 ; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8
2267 ; LMULMAX8-NEXT: fsrm a1
2268 ; LMULMAX8-NEXT: li a1, 52
2269 ; LMULMAX8-NEXT: vsrl.vx v8, v8, a1
2270 ; LMULMAX8-NEXT: li a1, 1023
2271 ; LMULMAX8-NEXT: vsub.vx v8, v8, a1
2272 ; LMULMAX8-NEXT: vse64.v v8, (a0)
2273 ; LMULMAX8-NEXT: ret
2275 ; ZVBB-LABEL: cttz_zero_undef_v4i64:
2277 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2278 ; ZVBB-NEXT: vle64.v v8, (a0)
2279 ; ZVBB-NEXT: vctz.v v8, v8
2280 ; ZVBB-NEXT: vse64.v v8, (a0)
2282 %a = load <4 x i64>, ptr %x
2283 %b = load <4 x i64>, ptr %y
2284 %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
2285 store <4 x i64> %c, ptr %x
2288 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2289 ; LMULMAX1-RV32: {{.*}}
2290 ; LMULMAX1-RV64: {{.*}}
2291 ; LMULMAX2-RV32: {{.*}}
2292 ; LMULMAX2-RV64: {{.*}}