1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -mtriple=riscv32 -mattr=+v < %s | FileCheck %s -check-prefix=RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s -check-prefix=RV64
7 define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
8 ; RV32-LABEL: ctz_nxv4i32:
10 ; RV32-NEXT: csrr a0, vlenb
11 ; RV32-NEXT: srli a0, a0, 1
12 ; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
13 ; RV32-NEXT: vmv.v.x v10, a0
14 ; RV32-NEXT: vid.v v11
15 ; RV32-NEXT: li a1, -1
16 ; RV32-NEXT: vmadd.vx v11, a1, v10
17 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
18 ; RV32-NEXT: vmsne.vi v0, v8, 0
19 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
20 ; RV32-NEXT: vmv.v.i v8, 0
21 ; RV32-NEXT: vmerge.vvm v8, v8, v11, v0
22 ; RV32-NEXT: vredmaxu.vs v8, v8, v8
23 ; RV32-NEXT: vmv.x.s a1, v8
24 ; RV32-NEXT: sub a0, a0, a1
25 ; RV32-NEXT: slli a0, a0, 16
26 ; RV32-NEXT: srli a0, a0, 16
29 ; RV64-LABEL: ctz_nxv4i32:
31 ; RV64-NEXT: csrr a0, vlenb
32 ; RV64-NEXT: srli a0, a0, 1
33 ; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
34 ; RV64-NEXT: vmv.v.x v10, a0
35 ; RV64-NEXT: vid.v v11
36 ; RV64-NEXT: li a1, -1
37 ; RV64-NEXT: vmadd.vx v11, a1, v10
38 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
39 ; RV64-NEXT: vmsne.vi v0, v8, 0
40 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
41 ; RV64-NEXT: vmv.v.i v8, 0
42 ; RV64-NEXT: vmerge.vvm v8, v8, v11, v0
43 ; RV64-NEXT: vredmaxu.vs v8, v8, v8
44 ; RV64-NEXT: vmv.x.s a1, v8
45 ; RV64-NEXT: subw a0, a0, a1
46 ; RV64-NEXT: slli a0, a0, 48
47 ; RV64-NEXT: srli a0, a0, 48
49 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32> %a, i1 0)
55 define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
56 ; RV32-LABEL: ctz_nxv8i1_no_range:
58 ; RV32-NEXT: addi sp, sp, -48
59 ; RV32-NEXT: .cfi_def_cfa_offset 48
60 ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
61 ; RV32-NEXT: .cfi_offset ra, -4
62 ; RV32-NEXT: csrr a0, vlenb
63 ; RV32-NEXT: slli a0, a0, 1
64 ; RV32-NEXT: sub sp, sp, a0
65 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
66 ; RV32-NEXT: addi a0, sp, 32
67 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
68 ; RV32-NEXT: csrr a0, vlenb
69 ; RV32-NEXT: srli a0, a0, 3
73 ; RV32-NEXT: call __muldi3
74 ; RV32-NEXT: sw a1, 20(sp)
75 ; RV32-NEXT: sw a0, 16(sp)
76 ; RV32-NEXT: addi a2, sp, 16
77 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
78 ; RV32-NEXT: vlse64.v v8, (a2), zero
79 ; RV32-NEXT: vid.v v16
80 ; RV32-NEXT: li a2, -1
81 ; RV32-NEXT: vmadd.vx v16, a2, v8
82 ; RV32-NEXT: addi a2, sp, 32
83 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
84 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
85 ; RV32-NEXT: vmsne.vi v0, v8, 0
86 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
87 ; RV32-NEXT: vmv.v.i v8, 0
88 ; RV32-NEXT: vmerge.vim v8, v8, -1, v0
89 ; RV32-NEXT: vand.vv v8, v16, v8
90 ; RV32-NEXT: vredmaxu.vs v8, v8, v8
91 ; RV32-NEXT: vmv.x.s a2, v8
92 ; RV32-NEXT: sltu a3, a0, a2
93 ; RV32-NEXT: li a4, 32
94 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
95 ; RV32-NEXT: vsrl.vx v8, v8, a4
96 ; RV32-NEXT: vmv.x.s a4, v8
97 ; RV32-NEXT: sub a1, a1, a4
98 ; RV32-NEXT: sub a1, a1, a3
99 ; RV32-NEXT: sub a0, a0, a2
100 ; RV32-NEXT: csrr a2, vlenb
101 ; RV32-NEXT: slli a2, a2, 1
102 ; RV32-NEXT: add sp, sp, a2
103 ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
104 ; RV32-NEXT: addi sp, sp, 48
107 ; RV64-LABEL: ctz_nxv8i1_no_range:
109 ; RV64-NEXT: csrr a0, vlenb
110 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
111 ; RV64-NEXT: vmv.v.x v16, a0
112 ; RV64-NEXT: vid.v v24
113 ; RV64-NEXT: li a1, -1
114 ; RV64-NEXT: vmadd.vx v24, a1, v16
115 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
116 ; RV64-NEXT: vmsne.vi v0, v8, 0
117 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
118 ; RV64-NEXT: vmv.v.i v8, 0
119 ; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
120 ; RV64-NEXT: vredmaxu.vs v8, v8, v8
121 ; RV64-NEXT: vmv.x.s a1, v8
122 ; RV64-NEXT: sub a0, a0, a1
124 %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16> %a, i1 0)
128 define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
129 ; RV32-LABEL: ctz_nxv16i1:
131 ; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma
132 ; RV32-NEXT: vfirst.m a0, v8
133 ; RV32-NEXT: bgez a0, .LBB2_2
134 ; RV32-NEXT: # %bb.1:
135 ; RV32-NEXT: csrr a0, vlenb
136 ; RV32-NEXT: slli a0, a0, 1
137 ; RV32-NEXT: .LBB2_2:
140 ; RV64-LABEL: ctz_nxv16i1:
142 ; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma
143 ; RV64-NEXT: vfirst.m a0, v8
144 ; RV64-NEXT: bgez a0, .LBB2_2
145 ; RV64-NEXT: # %bb.1:
146 ; RV64-NEXT: csrr a0, vlenb
147 ; RV64-NEXT: slli a0, a0, 1
148 ; RV64-NEXT: .LBB2_2:
150 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
154 define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
155 ; RV32-LABEL: ctz_nxv16i1_poison:
157 ; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma
158 ; RV32-NEXT: vfirst.m a0, v8
161 ; RV64-LABEL: ctz_nxv16i1_poison:
163 ; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma
164 ; RV64-NEXT: vfirst.m a0, v8
166 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
170 define i32 @ctz_v16i1(<16 x i1> %pg, <16 x i1> %a) {
171 ; RV32-LABEL: ctz_v16i1:
173 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
174 ; RV32-NEXT: vfirst.m a0, v8
175 ; RV32-NEXT: bgez a0, .LBB4_2
176 ; RV32-NEXT: # %bb.1:
177 ; RV32-NEXT: li a0, 16
178 ; RV32-NEXT: .LBB4_2:
181 ; RV64-LABEL: ctz_v16i1:
183 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
184 ; RV64-NEXT: vfirst.m a0, v8
185 ; RV64-NEXT: bgez a0, .LBB4_2
186 ; RV64-NEXT: # %bb.1:
187 ; RV64-NEXT: li a0, 16
188 ; RV64-NEXT: .LBB4_2:
190 %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
194 define i32 @ctz_v16i1_poison(<16 x i1> %pg, <16 x i1> %a) {
195 ; RV32-LABEL: ctz_v16i1_poison:
197 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
198 ; RV32-NEXT: vfirst.m a0, v8
201 ; RV64-LABEL: ctz_v16i1_poison:
203 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
204 ; RV64-NEXT: vfirst.m a0, v8
206 %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
210 define i16 @ctz_v8i1_i16_ret(<8 x i1> %a) {
211 ; RV32-LABEL: ctz_v8i1_i16_ret:
213 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
214 ; RV32-NEXT: vfirst.m a0, v0
215 ; RV32-NEXT: bgez a0, .LBB6_2
216 ; RV32-NEXT: # %bb.1:
217 ; RV32-NEXT: li a0, 8
218 ; RV32-NEXT: .LBB6_2:
221 ; RV64-LABEL: ctz_v8i1_i16_ret:
223 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
224 ; RV64-NEXT: vfirst.m a0, v0
225 ; RV64-NEXT: bgez a0, .LBB6_2
226 ; RV64-NEXT: # %bb.1:
227 ; RV64-NEXT: li a0, 8
228 ; RV64-NEXT: .LBB6_2:
230 %res = call i16 @llvm.experimental.cttz.elts.i16.v8i1(<8 x i1> %a, i1 0)
234 declare i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16>, i1)
235 declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1>, i1)
236 declare i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32>, i1)
237 declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
238 declare i16 @llvm.experimental.cttz.elts.i16.v16i1(<8 x i1>, i1)
240 attributes #0 = { vscale_range(2,1024) }