1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
8 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
11 declare <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
13 define <vscale x 1 x i8> @vp_bitreverse_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; CHECK-LABEL: vp_bitreverse_nxv1i8:
16 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
17 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
18 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
19 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
20 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
21 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
22 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
23 ; CHECK-NEXT: li a0, 51
24 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
25 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
26 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
27 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
28 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
29 ; CHECK-NEXT: li a0, 85
30 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
31 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
32 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
33 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
36 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8:
37 ; CHECK-ZVBB: # %bb.0:
38 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
39 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
40 ; CHECK-ZVBB-NEXT: ret
41 %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
42 ret <vscale x 1 x i8> %v
45 define <vscale x 1 x i8> @vp_bitreverse_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
46 ; CHECK-LABEL: vp_bitreverse_nxv1i8_unmasked:
48 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
49 ; CHECK-NEXT: vand.vi v9, v8, 15
50 ; CHECK-NEXT: vsll.vi v9, v9, 4
51 ; CHECK-NEXT: vsrl.vi v8, v8, 4
52 ; CHECK-NEXT: vand.vi v8, v8, 15
53 ; CHECK-NEXT: vor.vv v8, v8, v9
54 ; CHECK-NEXT: vsrl.vi v9, v8, 2
55 ; CHECK-NEXT: li a0, 51
56 ; CHECK-NEXT: vand.vx v9, v9, a0
57 ; CHECK-NEXT: vand.vx v8, v8, a0
58 ; CHECK-NEXT: vsll.vi v8, v8, 2
59 ; CHECK-NEXT: vor.vv v8, v9, v8
60 ; CHECK-NEXT: vsrl.vi v9, v8, 1
61 ; CHECK-NEXT: li a0, 85
62 ; CHECK-NEXT: vand.vx v9, v9, a0
63 ; CHECK-NEXT: vand.vx v8, v8, a0
64 ; CHECK-NEXT: vadd.vv v8, v8, v8
65 ; CHECK-NEXT: vor.vv v8, v9, v8
68 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8_unmasked:
69 ; CHECK-ZVBB: # %bb.0:
70 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
71 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
72 ; CHECK-ZVBB-NEXT: ret
73 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
74 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
75 %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
76 ret <vscale x 1 x i8> %v
79 declare <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
81 define <vscale x 2 x i8> @vp_bitreverse_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
82 ; CHECK-LABEL: vp_bitreverse_nxv2i8:
84 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
85 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
86 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
87 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
88 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
89 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
90 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
91 ; CHECK-NEXT: li a0, 51
92 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
93 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
94 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
95 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
96 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
97 ; CHECK-NEXT: li a0, 85
98 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
99 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
100 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
101 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
104 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8:
105 ; CHECK-ZVBB: # %bb.0:
106 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
107 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
108 ; CHECK-ZVBB-NEXT: ret
109 %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
110 ret <vscale x 2 x i8> %v
113 define <vscale x 2 x i8> @vp_bitreverse_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
114 ; CHECK-LABEL: vp_bitreverse_nxv2i8_unmasked:
116 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
117 ; CHECK-NEXT: vand.vi v9, v8, 15
118 ; CHECK-NEXT: vsll.vi v9, v9, 4
119 ; CHECK-NEXT: vsrl.vi v8, v8, 4
120 ; CHECK-NEXT: vand.vi v8, v8, 15
121 ; CHECK-NEXT: vor.vv v8, v8, v9
122 ; CHECK-NEXT: vsrl.vi v9, v8, 2
123 ; CHECK-NEXT: li a0, 51
124 ; CHECK-NEXT: vand.vx v9, v9, a0
125 ; CHECK-NEXT: vand.vx v8, v8, a0
126 ; CHECK-NEXT: vsll.vi v8, v8, 2
127 ; CHECK-NEXT: vor.vv v8, v9, v8
128 ; CHECK-NEXT: vsrl.vi v9, v8, 1
129 ; CHECK-NEXT: li a0, 85
130 ; CHECK-NEXT: vand.vx v9, v9, a0
131 ; CHECK-NEXT: vand.vx v8, v8, a0
132 ; CHECK-NEXT: vadd.vv v8, v8, v8
133 ; CHECK-NEXT: vor.vv v8, v9, v8
136 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8_unmasked:
137 ; CHECK-ZVBB: # %bb.0:
138 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
139 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
140 ; CHECK-ZVBB-NEXT: ret
141 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
142 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
143 %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
144 ret <vscale x 2 x i8> %v
147 declare <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
149 define <vscale x 4 x i8> @vp_bitreverse_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
150 ; CHECK-LABEL: vp_bitreverse_nxv4i8:
152 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
153 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
154 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
155 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
156 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
157 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
158 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
159 ; CHECK-NEXT: li a0, 51
160 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
161 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
162 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
163 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
164 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
165 ; CHECK-NEXT: li a0, 85
166 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
167 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
168 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
169 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
172 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8:
173 ; CHECK-ZVBB: # %bb.0:
174 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
175 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
176 ; CHECK-ZVBB-NEXT: ret
177 %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
178 ret <vscale x 4 x i8> %v
181 define <vscale x 4 x i8> @vp_bitreverse_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
182 ; CHECK-LABEL: vp_bitreverse_nxv4i8_unmasked:
184 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
185 ; CHECK-NEXT: vand.vi v9, v8, 15
186 ; CHECK-NEXT: vsll.vi v9, v9, 4
187 ; CHECK-NEXT: vsrl.vi v8, v8, 4
188 ; CHECK-NEXT: vand.vi v8, v8, 15
189 ; CHECK-NEXT: vor.vv v8, v8, v9
190 ; CHECK-NEXT: vsrl.vi v9, v8, 2
191 ; CHECK-NEXT: li a0, 51
192 ; CHECK-NEXT: vand.vx v9, v9, a0
193 ; CHECK-NEXT: vand.vx v8, v8, a0
194 ; CHECK-NEXT: vsll.vi v8, v8, 2
195 ; CHECK-NEXT: vor.vv v8, v9, v8
196 ; CHECK-NEXT: vsrl.vi v9, v8, 1
197 ; CHECK-NEXT: li a0, 85
198 ; CHECK-NEXT: vand.vx v9, v9, a0
199 ; CHECK-NEXT: vand.vx v8, v8, a0
200 ; CHECK-NEXT: vadd.vv v8, v8, v8
201 ; CHECK-NEXT: vor.vv v8, v9, v8
204 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8_unmasked:
205 ; CHECK-ZVBB: # %bb.0:
206 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
207 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
208 ; CHECK-ZVBB-NEXT: ret
209 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
210 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
211 %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
212 ret <vscale x 4 x i8> %v
215 declare <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
217 define <vscale x 8 x i8> @vp_bitreverse_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
218 ; CHECK-LABEL: vp_bitreverse_nxv8i8:
220 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
221 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
222 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
223 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
224 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
225 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
226 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
227 ; CHECK-NEXT: li a0, 51
228 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
229 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
230 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
231 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
232 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
233 ; CHECK-NEXT: li a0, 85
234 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
235 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
236 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
237 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
240 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8:
241 ; CHECK-ZVBB: # %bb.0:
242 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
243 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
244 ; CHECK-ZVBB-NEXT: ret
245 %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
246 ret <vscale x 8 x i8> %v
249 define <vscale x 8 x i8> @vp_bitreverse_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
250 ; CHECK-LABEL: vp_bitreverse_nxv8i8_unmasked:
252 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
253 ; CHECK-NEXT: vand.vi v9, v8, 15
254 ; CHECK-NEXT: vsll.vi v9, v9, 4
255 ; CHECK-NEXT: vsrl.vi v8, v8, 4
256 ; CHECK-NEXT: vand.vi v8, v8, 15
257 ; CHECK-NEXT: vor.vv v8, v8, v9
258 ; CHECK-NEXT: vsrl.vi v9, v8, 2
259 ; CHECK-NEXT: li a0, 51
260 ; CHECK-NEXT: vand.vx v9, v9, a0
261 ; CHECK-NEXT: vand.vx v8, v8, a0
262 ; CHECK-NEXT: vsll.vi v8, v8, 2
263 ; CHECK-NEXT: vor.vv v8, v9, v8
264 ; CHECK-NEXT: vsrl.vi v9, v8, 1
265 ; CHECK-NEXT: li a0, 85
266 ; CHECK-NEXT: vand.vx v9, v9, a0
267 ; CHECK-NEXT: vand.vx v8, v8, a0
268 ; CHECK-NEXT: vadd.vv v8, v8, v8
269 ; CHECK-NEXT: vor.vv v8, v9, v8
272 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8_unmasked:
273 ; CHECK-ZVBB: # %bb.0:
274 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
275 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
276 ; CHECK-ZVBB-NEXT: ret
277 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
278 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
279 %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
280 ret <vscale x 8 x i8> %v
283 declare <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
285 define <vscale x 16 x i8> @vp_bitreverse_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
286 ; CHECK-LABEL: vp_bitreverse_nxv16i8:
288 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
289 ; CHECK-NEXT: vand.vi v10, v8, 15, v0.t
290 ; CHECK-NEXT: vsll.vi v10, v10, 4, v0.t
291 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
292 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
293 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
294 ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
295 ; CHECK-NEXT: li a0, 51
296 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
297 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
298 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
299 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
300 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
301 ; CHECK-NEXT: li a0, 85
302 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
303 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
304 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
305 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
308 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8:
309 ; CHECK-ZVBB: # %bb.0:
310 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
311 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
312 ; CHECK-ZVBB-NEXT: ret
313 %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
314 ret <vscale x 16 x i8> %v
317 define <vscale x 16 x i8> @vp_bitreverse_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
318 ; CHECK-LABEL: vp_bitreverse_nxv16i8_unmasked:
320 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
321 ; CHECK-NEXT: vand.vi v10, v8, 15
322 ; CHECK-NEXT: vsll.vi v10, v10, 4
323 ; CHECK-NEXT: vsrl.vi v8, v8, 4
324 ; CHECK-NEXT: vand.vi v8, v8, 15
325 ; CHECK-NEXT: vor.vv v8, v8, v10
326 ; CHECK-NEXT: vsrl.vi v10, v8, 2
327 ; CHECK-NEXT: li a0, 51
328 ; CHECK-NEXT: vand.vx v10, v10, a0
329 ; CHECK-NEXT: vand.vx v8, v8, a0
330 ; CHECK-NEXT: vsll.vi v8, v8, 2
331 ; CHECK-NEXT: vor.vv v8, v10, v8
332 ; CHECK-NEXT: vsrl.vi v10, v8, 1
333 ; CHECK-NEXT: li a0, 85
334 ; CHECK-NEXT: vand.vx v10, v10, a0
335 ; CHECK-NEXT: vand.vx v8, v8, a0
336 ; CHECK-NEXT: vadd.vv v8, v8, v8
337 ; CHECK-NEXT: vor.vv v8, v10, v8
340 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8_unmasked:
341 ; CHECK-ZVBB: # %bb.0:
342 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
343 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
344 ; CHECK-ZVBB-NEXT: ret
345 %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
346 %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
347 %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
348 ret <vscale x 16 x i8> %v
351 declare <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32)
353 define <vscale x 32 x i8> @vp_bitreverse_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
354 ; CHECK-LABEL: vp_bitreverse_nxv32i8:
356 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
357 ; CHECK-NEXT: vand.vi v12, v8, 15, v0.t
358 ; CHECK-NEXT: vsll.vi v12, v12, 4, v0.t
359 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
360 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
361 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
362 ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
363 ; CHECK-NEXT: li a0, 51
364 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
365 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
366 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
367 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
368 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
369 ; CHECK-NEXT: li a0, 85
370 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
371 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
372 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
373 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
376 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8:
377 ; CHECK-ZVBB: # %bb.0:
378 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
379 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
380 ; CHECK-ZVBB-NEXT: ret
381 %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
382 ret <vscale x 32 x i8> %v
385 define <vscale x 32 x i8> @vp_bitreverse_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
386 ; CHECK-LABEL: vp_bitreverse_nxv32i8_unmasked:
388 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
389 ; CHECK-NEXT: vand.vi v12, v8, 15
390 ; CHECK-NEXT: vsll.vi v12, v12, 4
391 ; CHECK-NEXT: vsrl.vi v8, v8, 4
392 ; CHECK-NEXT: vand.vi v8, v8, 15
393 ; CHECK-NEXT: vor.vv v8, v8, v12
394 ; CHECK-NEXT: vsrl.vi v12, v8, 2
395 ; CHECK-NEXT: li a0, 51
396 ; CHECK-NEXT: vand.vx v12, v12, a0
397 ; CHECK-NEXT: vand.vx v8, v8, a0
398 ; CHECK-NEXT: vsll.vi v8, v8, 2
399 ; CHECK-NEXT: vor.vv v8, v12, v8
400 ; CHECK-NEXT: vsrl.vi v12, v8, 1
401 ; CHECK-NEXT: li a0, 85
402 ; CHECK-NEXT: vand.vx v12, v12, a0
403 ; CHECK-NEXT: vand.vx v8, v8, a0
404 ; CHECK-NEXT: vadd.vv v8, v8, v8
405 ; CHECK-NEXT: vor.vv v8, v12, v8
408 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8_unmasked:
409 ; CHECK-ZVBB: # %bb.0:
410 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
411 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
412 ; CHECK-ZVBB-NEXT: ret
413 %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
414 %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
415 %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
416 ret <vscale x 32 x i8> %v
419 declare <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32)
421 define <vscale x 64 x i8> @vp_bitreverse_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
422 ; CHECK-LABEL: vp_bitreverse_nxv64i8:
424 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
425 ; CHECK-NEXT: vand.vi v16, v8, 15, v0.t
426 ; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
427 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
428 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
429 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
430 ; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
431 ; CHECK-NEXT: li a0, 51
432 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
433 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
434 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
435 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
436 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
437 ; CHECK-NEXT: li a0, 85
438 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
439 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
440 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
441 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
444 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8:
445 ; CHECK-ZVBB: # %bb.0:
446 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma
447 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
448 ; CHECK-ZVBB-NEXT: ret
449 %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
450 ret <vscale x 64 x i8> %v
453 define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
454 ; CHECK-LABEL: vp_bitreverse_nxv64i8_unmasked:
456 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
457 ; CHECK-NEXT: vand.vi v16, v8, 15
458 ; CHECK-NEXT: vsll.vi v16, v16, 4
459 ; CHECK-NEXT: vsrl.vi v8, v8, 4
460 ; CHECK-NEXT: vand.vi v8, v8, 15
461 ; CHECK-NEXT: vor.vv v8, v8, v16
462 ; CHECK-NEXT: vsrl.vi v16, v8, 2
463 ; CHECK-NEXT: li a0, 51
464 ; CHECK-NEXT: vand.vx v16, v16, a0
465 ; CHECK-NEXT: vand.vx v8, v8, a0
466 ; CHECK-NEXT: vsll.vi v8, v8, 2
467 ; CHECK-NEXT: vor.vv v8, v16, v8
468 ; CHECK-NEXT: vsrl.vi v16, v8, 1
469 ; CHECK-NEXT: li a0, 85
470 ; CHECK-NEXT: vand.vx v16, v16, a0
471 ; CHECK-NEXT: vand.vx v8, v8, a0
472 ; CHECK-NEXT: vadd.vv v8, v8, v8
473 ; CHECK-NEXT: vor.vv v8, v16, v8
476 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8_unmasked:
477 ; CHECK-ZVBB: # %bb.0:
478 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma
479 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
480 ; CHECK-ZVBB-NEXT: ret
481 %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
482 %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
483 %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
484 ret <vscale x 64 x i8> %v
487 declare <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
489 define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
490 ; CHECK-LABEL: vp_bitreverse_nxv1i16:
492 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
493 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
494 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
495 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
496 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
497 ; CHECK-NEXT: lui a0, 1
498 ; CHECK-NEXT: addi a0, a0, -241
499 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
500 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
501 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
502 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
503 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
504 ; CHECK-NEXT: lui a0, 3
505 ; CHECK-NEXT: addi a0, a0, 819
506 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
507 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
508 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
509 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
510 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
511 ; CHECK-NEXT: lui a0, 5
512 ; CHECK-NEXT: addi a0, a0, 1365
513 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
514 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
515 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
516 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
519 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16:
520 ; CHECK-ZVBB: # %bb.0:
521 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
522 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
523 ; CHECK-ZVBB-NEXT: ret
524 %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
525 ret <vscale x 1 x i16> %v
528 define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
529 ; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked:
531 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
532 ; CHECK-NEXT: vsrl.vi v9, v8, 8
533 ; CHECK-NEXT: vsll.vi v8, v8, 8
534 ; CHECK-NEXT: vor.vv v8, v8, v9
535 ; CHECK-NEXT: vsrl.vi v9, v8, 4
536 ; CHECK-NEXT: lui a0, 1
537 ; CHECK-NEXT: addi a0, a0, -241
538 ; CHECK-NEXT: vand.vx v9, v9, a0
539 ; CHECK-NEXT: vand.vx v8, v8, a0
540 ; CHECK-NEXT: vsll.vi v8, v8, 4
541 ; CHECK-NEXT: vor.vv v8, v9, v8
542 ; CHECK-NEXT: vsrl.vi v9, v8, 2
543 ; CHECK-NEXT: lui a0, 3
544 ; CHECK-NEXT: addi a0, a0, 819
545 ; CHECK-NEXT: vand.vx v9, v9, a0
546 ; CHECK-NEXT: vand.vx v8, v8, a0
547 ; CHECK-NEXT: vsll.vi v8, v8, 2
548 ; CHECK-NEXT: vor.vv v8, v9, v8
549 ; CHECK-NEXT: vsrl.vi v9, v8, 1
550 ; CHECK-NEXT: lui a0, 5
551 ; CHECK-NEXT: addi a0, a0, 1365
552 ; CHECK-NEXT: vand.vx v9, v9, a0
553 ; CHECK-NEXT: vand.vx v8, v8, a0
554 ; CHECK-NEXT: vadd.vv v8, v8, v8
555 ; CHECK-NEXT: vor.vv v8, v9, v8
558 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked:
559 ; CHECK-ZVBB: # %bb.0:
560 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
561 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
562 ; CHECK-ZVBB-NEXT: ret
563 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
564 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
565 %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
566 ret <vscale x 1 x i16> %v
569 declare <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
571 define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
572 ; CHECK-LABEL: vp_bitreverse_nxv2i16:
574 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
575 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
576 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
577 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
578 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
579 ; CHECK-NEXT: lui a0, 1
580 ; CHECK-NEXT: addi a0, a0, -241
581 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
582 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
583 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
584 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
585 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
586 ; CHECK-NEXT: lui a0, 3
587 ; CHECK-NEXT: addi a0, a0, 819
588 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
589 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
590 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
591 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
592 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
593 ; CHECK-NEXT: lui a0, 5
594 ; CHECK-NEXT: addi a0, a0, 1365
595 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
596 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
597 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
598 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
601 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16:
602 ; CHECK-ZVBB: # %bb.0:
603 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
604 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
605 ; CHECK-ZVBB-NEXT: ret
606 %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
607 ret <vscale x 2 x i16> %v
610 define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
611 ; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked:
613 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
614 ; CHECK-NEXT: vsrl.vi v9, v8, 8
615 ; CHECK-NEXT: vsll.vi v8, v8, 8
616 ; CHECK-NEXT: vor.vv v8, v8, v9
617 ; CHECK-NEXT: vsrl.vi v9, v8, 4
618 ; CHECK-NEXT: lui a0, 1
619 ; CHECK-NEXT: addi a0, a0, -241
620 ; CHECK-NEXT: vand.vx v9, v9, a0
621 ; CHECK-NEXT: vand.vx v8, v8, a0
622 ; CHECK-NEXT: vsll.vi v8, v8, 4
623 ; CHECK-NEXT: vor.vv v8, v9, v8
624 ; CHECK-NEXT: vsrl.vi v9, v8, 2
625 ; CHECK-NEXT: lui a0, 3
626 ; CHECK-NEXT: addi a0, a0, 819
627 ; CHECK-NEXT: vand.vx v9, v9, a0
628 ; CHECK-NEXT: vand.vx v8, v8, a0
629 ; CHECK-NEXT: vsll.vi v8, v8, 2
630 ; CHECK-NEXT: vor.vv v8, v9, v8
631 ; CHECK-NEXT: vsrl.vi v9, v8, 1
632 ; CHECK-NEXT: lui a0, 5
633 ; CHECK-NEXT: addi a0, a0, 1365
634 ; CHECK-NEXT: vand.vx v9, v9, a0
635 ; CHECK-NEXT: vand.vx v8, v8, a0
636 ; CHECK-NEXT: vadd.vv v8, v8, v8
637 ; CHECK-NEXT: vor.vv v8, v9, v8
640 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked:
641 ; CHECK-ZVBB: # %bb.0:
642 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
643 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
644 ; CHECK-ZVBB-NEXT: ret
645 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
646 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
647 %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
648 ret <vscale x 2 x i16> %v
651 declare <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
653 define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
654 ; CHECK-LABEL: vp_bitreverse_nxv4i16:
656 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
657 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
658 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
659 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
660 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
661 ; CHECK-NEXT: lui a0, 1
662 ; CHECK-NEXT: addi a0, a0, -241
663 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
664 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
665 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
666 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
667 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
668 ; CHECK-NEXT: lui a0, 3
669 ; CHECK-NEXT: addi a0, a0, 819
670 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
671 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
672 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
673 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
674 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
675 ; CHECK-NEXT: lui a0, 5
676 ; CHECK-NEXT: addi a0, a0, 1365
677 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
678 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
679 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
680 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
683 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16:
684 ; CHECK-ZVBB: # %bb.0:
685 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
686 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
687 ; CHECK-ZVBB-NEXT: ret
688 %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
689 ret <vscale x 4 x i16> %v
692 define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
693 ; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked:
695 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
696 ; CHECK-NEXT: vsrl.vi v9, v8, 8
697 ; CHECK-NEXT: vsll.vi v8, v8, 8
698 ; CHECK-NEXT: vor.vv v8, v8, v9
699 ; CHECK-NEXT: vsrl.vi v9, v8, 4
700 ; CHECK-NEXT: lui a0, 1
701 ; CHECK-NEXT: addi a0, a0, -241
702 ; CHECK-NEXT: vand.vx v9, v9, a0
703 ; CHECK-NEXT: vand.vx v8, v8, a0
704 ; CHECK-NEXT: vsll.vi v8, v8, 4
705 ; CHECK-NEXT: vor.vv v8, v9, v8
706 ; CHECK-NEXT: vsrl.vi v9, v8, 2
707 ; CHECK-NEXT: lui a0, 3
708 ; CHECK-NEXT: addi a0, a0, 819
709 ; CHECK-NEXT: vand.vx v9, v9, a0
710 ; CHECK-NEXT: vand.vx v8, v8, a0
711 ; CHECK-NEXT: vsll.vi v8, v8, 2
712 ; CHECK-NEXT: vor.vv v8, v9, v8
713 ; CHECK-NEXT: vsrl.vi v9, v8, 1
714 ; CHECK-NEXT: lui a0, 5
715 ; CHECK-NEXT: addi a0, a0, 1365
716 ; CHECK-NEXT: vand.vx v9, v9, a0
717 ; CHECK-NEXT: vand.vx v8, v8, a0
718 ; CHECK-NEXT: vadd.vv v8, v8, v8
719 ; CHECK-NEXT: vor.vv v8, v9, v8
722 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked:
723 ; CHECK-ZVBB: # %bb.0:
724 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
725 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
726 ; CHECK-ZVBB-NEXT: ret
727 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
728 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
729 %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
730 ret <vscale x 4 x i16> %v
733 declare <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
735 define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
736 ; CHECK-LABEL: vp_bitreverse_nxv8i16:
738 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
739 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
740 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
741 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
742 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
743 ; CHECK-NEXT: lui a0, 1
744 ; CHECK-NEXT: addi a0, a0, -241
745 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
746 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
747 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
748 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
749 ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
750 ; CHECK-NEXT: lui a0, 3
751 ; CHECK-NEXT: addi a0, a0, 819
752 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
753 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
754 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
755 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
756 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
757 ; CHECK-NEXT: lui a0, 5
758 ; CHECK-NEXT: addi a0, a0, 1365
759 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
760 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
761 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
762 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
765 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16:
766 ; CHECK-ZVBB: # %bb.0:
767 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
768 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
769 ; CHECK-ZVBB-NEXT: ret
770 %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
771 ret <vscale x 8 x i16> %v
774 define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
775 ; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked:
777 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
778 ; CHECK-NEXT: vsrl.vi v10, v8, 8
779 ; CHECK-NEXT: vsll.vi v8, v8, 8
780 ; CHECK-NEXT: vor.vv v8, v8, v10
781 ; CHECK-NEXT: vsrl.vi v10, v8, 4
782 ; CHECK-NEXT: lui a0, 1
783 ; CHECK-NEXT: addi a0, a0, -241
784 ; CHECK-NEXT: vand.vx v10, v10, a0
785 ; CHECK-NEXT: vand.vx v8, v8, a0
786 ; CHECK-NEXT: vsll.vi v8, v8, 4
787 ; CHECK-NEXT: vor.vv v8, v10, v8
788 ; CHECK-NEXT: vsrl.vi v10, v8, 2
789 ; CHECK-NEXT: lui a0, 3
790 ; CHECK-NEXT: addi a0, a0, 819
791 ; CHECK-NEXT: vand.vx v10, v10, a0
792 ; CHECK-NEXT: vand.vx v8, v8, a0
793 ; CHECK-NEXT: vsll.vi v8, v8, 2
794 ; CHECK-NEXT: vor.vv v8, v10, v8
795 ; CHECK-NEXT: vsrl.vi v10, v8, 1
796 ; CHECK-NEXT: lui a0, 5
797 ; CHECK-NEXT: addi a0, a0, 1365
798 ; CHECK-NEXT: vand.vx v10, v10, a0
799 ; CHECK-NEXT: vand.vx v8, v8, a0
800 ; CHECK-NEXT: vadd.vv v8, v8, v8
801 ; CHECK-NEXT: vor.vv v8, v10, v8
804 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked:
805 ; CHECK-ZVBB: # %bb.0:
806 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
807 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
808 ; CHECK-ZVBB-NEXT: ret
809 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
810 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
811 %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
812 ret <vscale x 8 x i16> %v
815 declare <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
817 define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
818 ; CHECK-LABEL: vp_bitreverse_nxv16i16:
820 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
821 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
822 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
823 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
824 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
825 ; CHECK-NEXT: lui a0, 1
826 ; CHECK-NEXT: addi a0, a0, -241
827 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
828 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
829 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
830 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
831 ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
832 ; CHECK-NEXT: lui a0, 3
833 ; CHECK-NEXT: addi a0, a0, 819
834 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
835 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
836 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
837 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
838 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
839 ; CHECK-NEXT: lui a0, 5
840 ; CHECK-NEXT: addi a0, a0, 1365
841 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
842 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
843 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
844 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
847 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16:
848 ; CHECK-ZVBB: # %bb.0:
849 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
850 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
851 ; CHECK-ZVBB-NEXT: ret
852 %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
853 ret <vscale x 16 x i16> %v
856 define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
857 ; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked:
859 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
860 ; CHECK-NEXT: vsrl.vi v12, v8, 8
861 ; CHECK-NEXT: vsll.vi v8, v8, 8
862 ; CHECK-NEXT: vor.vv v8, v8, v12
863 ; CHECK-NEXT: vsrl.vi v12, v8, 4
864 ; CHECK-NEXT: lui a0, 1
865 ; CHECK-NEXT: addi a0, a0, -241
866 ; CHECK-NEXT: vand.vx v12, v12, a0
867 ; CHECK-NEXT: vand.vx v8, v8, a0
868 ; CHECK-NEXT: vsll.vi v8, v8, 4
869 ; CHECK-NEXT: vor.vv v8, v12, v8
870 ; CHECK-NEXT: vsrl.vi v12, v8, 2
871 ; CHECK-NEXT: lui a0, 3
872 ; CHECK-NEXT: addi a0, a0, 819
873 ; CHECK-NEXT: vand.vx v12, v12, a0
874 ; CHECK-NEXT: vand.vx v8, v8, a0
875 ; CHECK-NEXT: vsll.vi v8, v8, 2
876 ; CHECK-NEXT: vor.vv v8, v12, v8
877 ; CHECK-NEXT: vsrl.vi v12, v8, 1
878 ; CHECK-NEXT: lui a0, 5
879 ; CHECK-NEXT: addi a0, a0, 1365
880 ; CHECK-NEXT: vand.vx v12, v12, a0
881 ; CHECK-NEXT: vand.vx v8, v8, a0
882 ; CHECK-NEXT: vadd.vv v8, v8, v8
883 ; CHECK-NEXT: vor.vv v8, v12, v8
886 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked:
887 ; CHECK-ZVBB: # %bb.0:
888 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
889 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
890 ; CHECK-ZVBB-NEXT: ret
891 %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
892 %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
893 %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
894 ret <vscale x 16 x i16> %v
897 declare <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
899 define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
900 ; CHECK-LABEL: vp_bitreverse_nxv32i16:
902 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
903 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
904 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
905 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
906 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
907 ; CHECK-NEXT: lui a0, 1
908 ; CHECK-NEXT: addi a0, a0, -241
909 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
910 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
911 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
912 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
913 ; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
914 ; CHECK-NEXT: lui a0, 3
915 ; CHECK-NEXT: addi a0, a0, 819
916 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
917 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
918 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
919 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
920 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
921 ; CHECK-NEXT: lui a0, 5
922 ; CHECK-NEXT: addi a0, a0, 1365
923 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
924 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
925 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
926 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
929 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16:
930 ; CHECK-ZVBB: # %bb.0:
931 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
932 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
933 ; CHECK-ZVBB-NEXT: ret
934 %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
935 ret <vscale x 32 x i16> %v
938 define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
939 ; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked:
941 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
942 ; CHECK-NEXT: vsrl.vi v16, v8, 8
943 ; CHECK-NEXT: vsll.vi v8, v8, 8
944 ; CHECK-NEXT: vor.vv v8, v8, v16
945 ; CHECK-NEXT: vsrl.vi v16, v8, 4
946 ; CHECK-NEXT: lui a0, 1
947 ; CHECK-NEXT: addi a0, a0, -241
948 ; CHECK-NEXT: vand.vx v16, v16, a0
949 ; CHECK-NEXT: vand.vx v8, v8, a0
950 ; CHECK-NEXT: vsll.vi v8, v8, 4
951 ; CHECK-NEXT: vor.vv v8, v16, v8
952 ; CHECK-NEXT: vsrl.vi v16, v8, 2
953 ; CHECK-NEXT: lui a0, 3
954 ; CHECK-NEXT: addi a0, a0, 819
955 ; CHECK-NEXT: vand.vx v16, v16, a0
956 ; CHECK-NEXT: vand.vx v8, v8, a0
957 ; CHECK-NEXT: vsll.vi v8, v8, 2
958 ; CHECK-NEXT: vor.vv v8, v16, v8
959 ; CHECK-NEXT: vsrl.vi v16, v8, 1
960 ; CHECK-NEXT: lui a0, 5
961 ; CHECK-NEXT: addi a0, a0, 1365
962 ; CHECK-NEXT: vand.vx v16, v16, a0
963 ; CHECK-NEXT: vand.vx v8, v8, a0
964 ; CHECK-NEXT: vadd.vv v8, v8, v8
965 ; CHECK-NEXT: vor.vv v8, v16, v8
968 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked:
969 ; CHECK-ZVBB: # %bb.0:
970 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
971 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
972 ; CHECK-ZVBB-NEXT: ret
973 %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
974 %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
975 %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
976 ret <vscale x 32 x i16> %v
979 declare <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
981 define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
982 ; CHECK-LABEL: vp_bitreverse_nxv1i32:
984 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
985 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
986 ; CHECK-NEXT: lui a0, 16
987 ; CHECK-NEXT: addi a0, a0, -256
988 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
989 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
990 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
991 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
992 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
993 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
994 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
995 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
996 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
997 ; CHECK-NEXT: lui a0, 61681
998 ; CHECK-NEXT: addi a0, a0, -241
999 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1000 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1001 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1002 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1003 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
1004 ; CHECK-NEXT: lui a0, 209715
1005 ; CHECK-NEXT: addi a0, a0, 819
1006 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1007 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1008 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1009 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1010 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
1011 ; CHECK-NEXT: lui a0, 349525
1012 ; CHECK-NEXT: addi a0, a0, 1365
1013 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1014 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1015 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1016 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1019 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32:
1020 ; CHECK-ZVBB: # %bb.0:
1021 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1022 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1023 ; CHECK-ZVBB-NEXT: ret
1024 %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
1025 ret <vscale x 1 x i32> %v
1028 define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
1029 ; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked:
1031 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1032 ; CHECK-NEXT: vsrl.vi v9, v8, 8
1033 ; CHECK-NEXT: lui a0, 16
1034 ; CHECK-NEXT: addi a0, a0, -256
1035 ; CHECK-NEXT: vand.vx v9, v9, a0
1036 ; CHECK-NEXT: vsrl.vi v10, v8, 24
1037 ; CHECK-NEXT: vor.vv v9, v9, v10
1038 ; CHECK-NEXT: vand.vx v10, v8, a0
1039 ; CHECK-NEXT: vsll.vi v10, v10, 8
1040 ; CHECK-NEXT: vsll.vi v8, v8, 24
1041 ; CHECK-NEXT: vor.vv v8, v8, v10
1042 ; CHECK-NEXT: vor.vv v8, v8, v9
1043 ; CHECK-NEXT: vsrl.vi v9, v8, 4
1044 ; CHECK-NEXT: lui a0, 61681
1045 ; CHECK-NEXT: addi a0, a0, -241
1046 ; CHECK-NEXT: vand.vx v9, v9, a0
1047 ; CHECK-NEXT: vand.vx v8, v8, a0
1048 ; CHECK-NEXT: vsll.vi v8, v8, 4
1049 ; CHECK-NEXT: vor.vv v8, v9, v8
1050 ; CHECK-NEXT: vsrl.vi v9, v8, 2
1051 ; CHECK-NEXT: lui a0, 209715
1052 ; CHECK-NEXT: addi a0, a0, 819
1053 ; CHECK-NEXT: vand.vx v9, v9, a0
1054 ; CHECK-NEXT: vand.vx v8, v8, a0
1055 ; CHECK-NEXT: vsll.vi v8, v8, 2
1056 ; CHECK-NEXT: vor.vv v8, v9, v8
1057 ; CHECK-NEXT: vsrl.vi v9, v8, 1
1058 ; CHECK-NEXT: lui a0, 349525
1059 ; CHECK-NEXT: addi a0, a0, 1365
1060 ; CHECK-NEXT: vand.vx v9, v9, a0
1061 ; CHECK-NEXT: vand.vx v8, v8, a0
1062 ; CHECK-NEXT: vadd.vv v8, v8, v8
1063 ; CHECK-NEXT: vor.vv v8, v9, v8
1066 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked:
1067 ; CHECK-ZVBB: # %bb.0:
1068 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1069 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1070 ; CHECK-ZVBB-NEXT: ret
1071 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
1072 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1073 %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
1074 ret <vscale x 1 x i32> %v
1077 declare <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1079 define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1080 ; CHECK-LABEL: vp_bitreverse_nxv2i32:
1082 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1083 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
1084 ; CHECK-NEXT: lui a0, 16
1085 ; CHECK-NEXT: addi a0, a0, -256
1086 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1087 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
1088 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
1089 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
1090 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
1091 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1092 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
1093 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
1094 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
1095 ; CHECK-NEXT: lui a0, 61681
1096 ; CHECK-NEXT: addi a0, a0, -241
1097 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1098 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1099 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1100 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1101 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
1102 ; CHECK-NEXT: lui a0, 209715
1103 ; CHECK-NEXT: addi a0, a0, 819
1104 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1105 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1106 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1107 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1108 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
1109 ; CHECK-NEXT: lui a0, 349525
1110 ; CHECK-NEXT: addi a0, a0, 1365
1111 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1112 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1113 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1114 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1117 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32:
1118 ; CHECK-ZVBB: # %bb.0:
1119 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1120 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1121 ; CHECK-ZVBB-NEXT: ret
1122 %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
1123 ret <vscale x 2 x i32> %v
1126 define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
1127 ; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked:
1129 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1130 ; CHECK-NEXT: vsrl.vi v9, v8, 8
1131 ; CHECK-NEXT: lui a0, 16
1132 ; CHECK-NEXT: addi a0, a0, -256
1133 ; CHECK-NEXT: vand.vx v9, v9, a0
1134 ; CHECK-NEXT: vsrl.vi v10, v8, 24
1135 ; CHECK-NEXT: vor.vv v9, v9, v10
1136 ; CHECK-NEXT: vand.vx v10, v8, a0
1137 ; CHECK-NEXT: vsll.vi v10, v10, 8
1138 ; CHECK-NEXT: vsll.vi v8, v8, 24
1139 ; CHECK-NEXT: vor.vv v8, v8, v10
1140 ; CHECK-NEXT: vor.vv v8, v8, v9
1141 ; CHECK-NEXT: vsrl.vi v9, v8, 4
1142 ; CHECK-NEXT: lui a0, 61681
1143 ; CHECK-NEXT: addi a0, a0, -241
1144 ; CHECK-NEXT: vand.vx v9, v9, a0
1145 ; CHECK-NEXT: vand.vx v8, v8, a0
1146 ; CHECK-NEXT: vsll.vi v8, v8, 4
1147 ; CHECK-NEXT: vor.vv v8, v9, v8
1148 ; CHECK-NEXT: vsrl.vi v9, v8, 2
1149 ; CHECK-NEXT: lui a0, 209715
1150 ; CHECK-NEXT: addi a0, a0, 819
1151 ; CHECK-NEXT: vand.vx v9, v9, a0
1152 ; CHECK-NEXT: vand.vx v8, v8, a0
1153 ; CHECK-NEXT: vsll.vi v8, v8, 2
1154 ; CHECK-NEXT: vor.vv v8, v9, v8
1155 ; CHECK-NEXT: vsrl.vi v9, v8, 1
1156 ; CHECK-NEXT: lui a0, 349525
1157 ; CHECK-NEXT: addi a0, a0, 1365
1158 ; CHECK-NEXT: vand.vx v9, v9, a0
1159 ; CHECK-NEXT: vand.vx v8, v8, a0
1160 ; CHECK-NEXT: vadd.vv v8, v8, v8
1161 ; CHECK-NEXT: vor.vv v8, v9, v8
1164 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked:
1165 ; CHECK-ZVBB: # %bb.0:
1166 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1167 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1168 ; CHECK-ZVBB-NEXT: ret
1169 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
1170 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1171 %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
1172 ret <vscale x 2 x i32> %v
1175 declare <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1177 define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1178 ; CHECK-LABEL: vp_bitreverse_nxv4i32:
1180 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1181 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
1182 ; CHECK-NEXT: lui a0, 16
1183 ; CHECK-NEXT: addi a0, a0, -256
1184 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1185 ; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
1186 ; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
1187 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
1188 ; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
1189 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1190 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
1191 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
1192 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
1193 ; CHECK-NEXT: lui a0, 61681
1194 ; CHECK-NEXT: addi a0, a0, -241
1195 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1196 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1197 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1198 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
1199 ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
1200 ; CHECK-NEXT: lui a0, 209715
1201 ; CHECK-NEXT: addi a0, a0, 819
1202 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1203 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1204 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1205 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
1206 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
1207 ; CHECK-NEXT: lui a0, 349525
1208 ; CHECK-NEXT: addi a0, a0, 1365
1209 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1210 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1211 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1212 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
1215 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32:
1216 ; CHECK-ZVBB: # %bb.0:
1217 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1218 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1219 ; CHECK-ZVBB-NEXT: ret
1220 %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
1221 ret <vscale x 4 x i32> %v
1224 define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
1225 ; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked:
1227 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1228 ; CHECK-NEXT: vsrl.vi v10, v8, 8
1229 ; CHECK-NEXT: lui a0, 16
1230 ; CHECK-NEXT: addi a0, a0, -256
1231 ; CHECK-NEXT: vand.vx v10, v10, a0
1232 ; CHECK-NEXT: vsrl.vi v12, v8, 24
1233 ; CHECK-NEXT: vor.vv v10, v10, v12
1234 ; CHECK-NEXT: vand.vx v12, v8, a0
1235 ; CHECK-NEXT: vsll.vi v12, v12, 8
1236 ; CHECK-NEXT: vsll.vi v8, v8, 24
1237 ; CHECK-NEXT: vor.vv v8, v8, v12
1238 ; CHECK-NEXT: vor.vv v8, v8, v10
1239 ; CHECK-NEXT: vsrl.vi v10, v8, 4
1240 ; CHECK-NEXT: lui a0, 61681
1241 ; CHECK-NEXT: addi a0, a0, -241
1242 ; CHECK-NEXT: vand.vx v10, v10, a0
1243 ; CHECK-NEXT: vand.vx v8, v8, a0
1244 ; CHECK-NEXT: vsll.vi v8, v8, 4
1245 ; CHECK-NEXT: vor.vv v8, v10, v8
1246 ; CHECK-NEXT: vsrl.vi v10, v8, 2
1247 ; CHECK-NEXT: lui a0, 209715
1248 ; CHECK-NEXT: addi a0, a0, 819
1249 ; CHECK-NEXT: vand.vx v10, v10, a0
1250 ; CHECK-NEXT: vand.vx v8, v8, a0
1251 ; CHECK-NEXT: vsll.vi v8, v8, 2
1252 ; CHECK-NEXT: vor.vv v8, v10, v8
1253 ; CHECK-NEXT: vsrl.vi v10, v8, 1
1254 ; CHECK-NEXT: lui a0, 349525
1255 ; CHECK-NEXT: addi a0, a0, 1365
1256 ; CHECK-NEXT: vand.vx v10, v10, a0
1257 ; CHECK-NEXT: vand.vx v8, v8, a0
1258 ; CHECK-NEXT: vadd.vv v8, v8, v8
1259 ; CHECK-NEXT: vor.vv v8, v10, v8
1262 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked:
1263 ; CHECK-ZVBB: # %bb.0:
1264 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1265 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1266 ; CHECK-ZVBB-NEXT: ret
1267 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
1268 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1269 %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
1270 ret <vscale x 4 x i32> %v
1273 declare <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1275 define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1276 ; CHECK-LABEL: vp_bitreverse_nxv8i32:
1278 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1279 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
1280 ; CHECK-NEXT: lui a0, 16
1281 ; CHECK-NEXT: addi a0, a0, -256
1282 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1283 ; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
1284 ; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
1285 ; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
1286 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
1287 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1288 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
1289 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
1290 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
1291 ; CHECK-NEXT: lui a0, 61681
1292 ; CHECK-NEXT: addi a0, a0, -241
1293 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1294 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1295 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1296 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
1297 ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
1298 ; CHECK-NEXT: lui a0, 209715
1299 ; CHECK-NEXT: addi a0, a0, 819
1300 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1301 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1302 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1303 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
1304 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
1305 ; CHECK-NEXT: lui a0, 349525
1306 ; CHECK-NEXT: addi a0, a0, 1365
1307 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1308 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1309 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1310 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
1313 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32:
1314 ; CHECK-ZVBB: # %bb.0:
1315 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1316 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1317 ; CHECK-ZVBB-NEXT: ret
1318 %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
1319 ret <vscale x 8 x i32> %v
1322 define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
1323 ; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked:
1325 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1326 ; CHECK-NEXT: vsrl.vi v12, v8, 8
1327 ; CHECK-NEXT: lui a0, 16
1328 ; CHECK-NEXT: addi a0, a0, -256
1329 ; CHECK-NEXT: vand.vx v12, v12, a0
1330 ; CHECK-NEXT: vsrl.vi v16, v8, 24
1331 ; CHECK-NEXT: vor.vv v12, v12, v16
1332 ; CHECK-NEXT: vand.vx v16, v8, a0
1333 ; CHECK-NEXT: vsll.vi v16, v16, 8
1334 ; CHECK-NEXT: vsll.vi v8, v8, 24
1335 ; CHECK-NEXT: vor.vv v8, v8, v16
1336 ; CHECK-NEXT: vor.vv v8, v8, v12
1337 ; CHECK-NEXT: vsrl.vi v12, v8, 4
1338 ; CHECK-NEXT: lui a0, 61681
1339 ; CHECK-NEXT: addi a0, a0, -241
1340 ; CHECK-NEXT: vand.vx v12, v12, a0
1341 ; CHECK-NEXT: vand.vx v8, v8, a0
1342 ; CHECK-NEXT: vsll.vi v8, v8, 4
1343 ; CHECK-NEXT: vor.vv v8, v12, v8
1344 ; CHECK-NEXT: vsrl.vi v12, v8, 2
1345 ; CHECK-NEXT: lui a0, 209715
1346 ; CHECK-NEXT: addi a0, a0, 819
1347 ; CHECK-NEXT: vand.vx v12, v12, a0
1348 ; CHECK-NEXT: vand.vx v8, v8, a0
1349 ; CHECK-NEXT: vsll.vi v8, v8, 2
1350 ; CHECK-NEXT: vor.vv v8, v12, v8
1351 ; CHECK-NEXT: vsrl.vi v12, v8, 1
1352 ; CHECK-NEXT: lui a0, 349525
1353 ; CHECK-NEXT: addi a0, a0, 1365
1354 ; CHECK-NEXT: vand.vx v12, v12, a0
1355 ; CHECK-NEXT: vand.vx v8, v8, a0
1356 ; CHECK-NEXT: vadd.vv v8, v8, v8
1357 ; CHECK-NEXT: vor.vv v8, v12, v8
1360 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked:
1361 ; CHECK-ZVBB: # %bb.0:
1362 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1363 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1364 ; CHECK-ZVBB-NEXT: ret
1365 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
1366 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1367 %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
1368 ret <vscale x 8 x i32> %v
1371 declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1373 define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1374 ; CHECK-LABEL: vp_bitreverse_nxv16i32:
1376 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1377 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
1378 ; CHECK-NEXT: lui a0, 16
1379 ; CHECK-NEXT: addi a0, a0, -256
1380 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1381 ; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t
1382 ; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
1383 ; CHECK-NEXT: vand.vx v24, v8, a0, v0.t
1384 ; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t
1385 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1386 ; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
1387 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
1388 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
1389 ; CHECK-NEXT: lui a0, 61681
1390 ; CHECK-NEXT: addi a0, a0, -241
1391 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1392 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1393 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1394 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
1395 ; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
1396 ; CHECK-NEXT: lui a0, 209715
1397 ; CHECK-NEXT: addi a0, a0, 819
1398 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1399 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1400 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1401 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
1402 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
1403 ; CHECK-NEXT: lui a0, 349525
1404 ; CHECK-NEXT: addi a0, a0, 1365
1405 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1406 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1407 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1408 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
1411 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32:
1412 ; CHECK-ZVBB: # %bb.0:
1413 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1414 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1415 ; CHECK-ZVBB-NEXT: ret
1416 %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
1417 ret <vscale x 16 x i32> %v
1420 define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
1421 ; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked:
1423 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1424 ; CHECK-NEXT: vsrl.vi v16, v8, 8
1425 ; CHECK-NEXT: lui a0, 16
1426 ; CHECK-NEXT: addi a0, a0, -256
1427 ; CHECK-NEXT: vand.vx v16, v16, a0
1428 ; CHECK-NEXT: vsrl.vi v24, v8, 24
1429 ; CHECK-NEXT: vor.vv v16, v16, v24
1430 ; CHECK-NEXT: vand.vx v24, v8, a0
1431 ; CHECK-NEXT: vsll.vi v24, v24, 8
1432 ; CHECK-NEXT: vsll.vi v8, v8, 24
1433 ; CHECK-NEXT: vor.vv v8, v8, v24
1434 ; CHECK-NEXT: vor.vv v8, v8, v16
1435 ; CHECK-NEXT: vsrl.vi v16, v8, 4
1436 ; CHECK-NEXT: lui a0, 61681
1437 ; CHECK-NEXT: addi a0, a0, -241
1438 ; CHECK-NEXT: vand.vx v16, v16, a0
1439 ; CHECK-NEXT: vand.vx v8, v8, a0
1440 ; CHECK-NEXT: vsll.vi v8, v8, 4
1441 ; CHECK-NEXT: vor.vv v8, v16, v8
1442 ; CHECK-NEXT: vsrl.vi v16, v8, 2
1443 ; CHECK-NEXT: lui a0, 209715
1444 ; CHECK-NEXT: addi a0, a0, 819
1445 ; CHECK-NEXT: vand.vx v16, v16, a0
1446 ; CHECK-NEXT: vand.vx v8, v8, a0
1447 ; CHECK-NEXT: vsll.vi v8, v8, 2
1448 ; CHECK-NEXT: vor.vv v8, v16, v8
1449 ; CHECK-NEXT: vsrl.vi v16, v8, 1
1450 ; CHECK-NEXT: lui a0, 349525
1451 ; CHECK-NEXT: addi a0, a0, 1365
1452 ; CHECK-NEXT: vand.vx v16, v16, a0
1453 ; CHECK-NEXT: vand.vx v8, v8, a0
1454 ; CHECK-NEXT: vadd.vv v8, v8, v8
1455 ; CHECK-NEXT: vor.vv v8, v16, v8
1458 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked:
1459 ; CHECK-ZVBB: # %bb.0:
1460 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1461 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1462 ; CHECK-ZVBB-NEXT: ret
1463 %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
1464 %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1465 %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
1466 ret <vscale x 16 x i32> %v
1469 declare <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1471 define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1472 ; RV32-LABEL: vp_bitreverse_nxv1i64:
1474 ; RV32-NEXT: addi sp, sp, -16
1475 ; RV32-NEXT: .cfi_def_cfa_offset 16
1476 ; RV32-NEXT: sw zero, 12(sp)
1477 ; RV32-NEXT: lui a1, 1044480
1478 ; RV32-NEXT: sw a1, 8(sp)
1479 ; RV32-NEXT: li a1, 56
1480 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1481 ; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
1482 ; RV32-NEXT: lui a2, 16
1483 ; RV32-NEXT: addi a2, a2, -256
1484 ; RV32-NEXT: vand.vx v10, v8, a2, v0.t
1485 ; RV32-NEXT: li a3, 40
1486 ; RV32-NEXT: vsll.vx v10, v10, a3, v0.t
1487 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
1488 ; RV32-NEXT: lui a4, 4080
1489 ; RV32-NEXT: vand.vx v10, v8, a4, v0.t
1490 ; RV32-NEXT: vsll.vi v10, v10, 24, v0.t
1491 ; RV32-NEXT: addi a5, sp, 8
1492 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma
1493 ; RV32-NEXT: vlse64.v v11, (a5), zero
1494 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1495 ; RV32-NEXT: vand.vv v12, v8, v11, v0.t
1496 ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
1497 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
1498 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
1499 ; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t
1500 ; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t
1501 ; RV32-NEXT: vand.vx v12, v12, a2, v0.t
1502 ; RV32-NEXT: vor.vv v10, v12, v10, v0.t
1503 ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
1504 ; RV32-NEXT: vand.vx v12, v12, a4, v0.t
1505 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1506 ; RV32-NEXT: vand.vv v8, v8, v11, v0.t
1507 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
1508 ; RV32-NEXT: vor.vv v8, v8, v10, v0.t
1509 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1510 ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
1511 ; RV32-NEXT: lui a1, 61681
1512 ; RV32-NEXT: addi a1, a1, -241
1513 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1514 ; RV32-NEXT: vmv.v.x v10, a1
1515 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1516 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
1517 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1518 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1519 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1520 ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
1521 ; RV32-NEXT: lui a1, 209715
1522 ; RV32-NEXT: addi a1, a1, 819
1523 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1524 ; RV32-NEXT: vmv.v.x v10, a1
1525 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1526 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
1527 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1528 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1529 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1530 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
1531 ; RV32-NEXT: lui a1, 349525
1532 ; RV32-NEXT: addi a1, a1, 1365
1533 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1534 ; RV32-NEXT: vmv.v.x v10, a1
1535 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1536 ; RV32-NEXT: vand.vv v9, v9, v10, v0.t
1537 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1538 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
1539 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1540 ; RV32-NEXT: addi sp, sp, 16
1543 ; RV64-LABEL: vp_bitreverse_nxv1i64:
1545 ; RV64-NEXT: lui a1, 4080
1546 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1547 ; RV64-NEXT: vand.vx v9, v8, a1, v0.t
1548 ; RV64-NEXT: vsll.vi v9, v9, 24, v0.t
1549 ; RV64-NEXT: li a0, 255
1550 ; RV64-NEXT: slli a0, a0, 24
1551 ; RV64-NEXT: vand.vx v10, v8, a0, v0.t
1552 ; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
1553 ; RV64-NEXT: vor.vv v9, v9, v10, v0.t
1554 ; RV64-NEXT: li a2, 56
1555 ; RV64-NEXT: vsll.vx v10, v8, a2, v0.t
1556 ; RV64-NEXT: lui a3, 16
1557 ; RV64-NEXT: addiw a3, a3, -256
1558 ; RV64-NEXT: vand.vx v11, v8, a3, v0.t
1559 ; RV64-NEXT: li a4, 40
1560 ; RV64-NEXT: vsll.vx v11, v11, a4, v0.t
1561 ; RV64-NEXT: vor.vv v10, v10, v11, v0.t
1562 ; RV64-NEXT: vor.vv v9, v10, v9, v0.t
1563 ; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t
1564 ; RV64-NEXT: vsrl.vx v11, v8, a4, v0.t
1565 ; RV64-NEXT: vand.vx v11, v11, a3, v0.t
1566 ; RV64-NEXT: vor.vv v10, v11, v10, v0.t
1567 ; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t
1568 ; RV64-NEXT: vand.vx v11, v11, a1, v0.t
1569 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1570 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1571 ; RV64-NEXT: vor.vv v8, v8, v11, v0.t
1572 ; RV64-NEXT: vor.vv v8, v8, v10, v0.t
1573 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1574 ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
1575 ; RV64-NEXT: lui a0, 61681
1576 ; RV64-NEXT: addiw a0, a0, -241
1577 ; RV64-NEXT: slli a1, a0, 32
1578 ; RV64-NEXT: add a0, a0, a1
1579 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t
1580 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1581 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
1582 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1583 ; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
1584 ; RV64-NEXT: lui a0, 209715
1585 ; RV64-NEXT: addiw a0, a0, 819
1586 ; RV64-NEXT: slli a1, a0, 32
1587 ; RV64-NEXT: add a0, a0, a1
1588 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t
1589 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1590 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
1591 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1592 ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
1593 ; RV64-NEXT: lui a0, 349525
1594 ; RV64-NEXT: addiw a0, a0, 1365
1595 ; RV64-NEXT: slli a1, a0, 32
1596 ; RV64-NEXT: add a0, a0, a1
1597 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t
1598 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1599 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
1600 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1603 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64:
1604 ; CHECK-ZVBB: # %bb.0:
1605 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1606 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1607 ; CHECK-ZVBB-NEXT: ret
1608 %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
1609 ret <vscale x 1 x i64> %v
1612 define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
1613 ; RV32-LABEL: vp_bitreverse_nxv1i64_unmasked:
1615 ; RV32-NEXT: addi sp, sp, -16
1616 ; RV32-NEXT: .cfi_def_cfa_offset 16
1617 ; RV32-NEXT: sw zero, 12(sp)
1618 ; RV32-NEXT: lui a1, 1044480
1619 ; RV32-NEXT: sw a1, 8(sp)
1620 ; RV32-NEXT: li a1, 56
1621 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1622 ; RV32-NEXT: vsll.vx v9, v8, a1
1623 ; RV32-NEXT: lui a2, 16
1624 ; RV32-NEXT: addi a2, a2, -256
1625 ; RV32-NEXT: vand.vx v10, v8, a2
1626 ; RV32-NEXT: li a3, 40
1627 ; RV32-NEXT: vsll.vx v10, v10, a3
1628 ; RV32-NEXT: vor.vv v9, v9, v10
1629 ; RV32-NEXT: lui a4, 4080
1630 ; RV32-NEXT: vand.vx v10, v8, a4
1631 ; RV32-NEXT: vsll.vi v10, v10, 24
1632 ; RV32-NEXT: addi a5, sp, 8
1633 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma
1634 ; RV32-NEXT: vlse64.v v11, (a5), zero
1635 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1636 ; RV32-NEXT: vand.vv v12, v8, v11
1637 ; RV32-NEXT: vsll.vi v12, v12, 8
1638 ; RV32-NEXT: vor.vv v10, v10, v12
1639 ; RV32-NEXT: vor.vv v9, v9, v10
1640 ; RV32-NEXT: vsrl.vx v10, v8, a1
1641 ; RV32-NEXT: vsrl.vx v12, v8, a3
1642 ; RV32-NEXT: vand.vx v12, v12, a2
1643 ; RV32-NEXT: vor.vv v10, v12, v10
1644 ; RV32-NEXT: vsrl.vi v12, v8, 24
1645 ; RV32-NEXT: vand.vx v12, v12, a4
1646 ; RV32-NEXT: vsrl.vi v8, v8, 8
1647 ; RV32-NEXT: vand.vv v8, v8, v11
1648 ; RV32-NEXT: vor.vv v8, v8, v12
1649 ; RV32-NEXT: vor.vv v8, v8, v10
1650 ; RV32-NEXT: vor.vv v8, v9, v8
1651 ; RV32-NEXT: vsrl.vi v9, v8, 4
1652 ; RV32-NEXT: lui a1, 61681
1653 ; RV32-NEXT: addi a1, a1, -241
1654 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1655 ; RV32-NEXT: vmv.v.x v10, a1
1656 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1657 ; RV32-NEXT: vand.vv v9, v9, v10
1658 ; RV32-NEXT: vand.vv v8, v8, v10
1659 ; RV32-NEXT: vsll.vi v8, v8, 4
1660 ; RV32-NEXT: vor.vv v8, v9, v8
1661 ; RV32-NEXT: vsrl.vi v9, v8, 2
1662 ; RV32-NEXT: lui a1, 209715
1663 ; RV32-NEXT: addi a1, a1, 819
1664 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1665 ; RV32-NEXT: vmv.v.x v10, a1
1666 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1667 ; RV32-NEXT: vand.vv v9, v9, v10
1668 ; RV32-NEXT: vand.vv v8, v8, v10
1669 ; RV32-NEXT: vsll.vi v8, v8, 2
1670 ; RV32-NEXT: vor.vv v8, v9, v8
1671 ; RV32-NEXT: vsrl.vi v9, v8, 1
1672 ; RV32-NEXT: lui a1, 349525
1673 ; RV32-NEXT: addi a1, a1, 1365
1674 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
1675 ; RV32-NEXT: vmv.v.x v10, a1
1676 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1677 ; RV32-NEXT: vand.vv v9, v9, v10
1678 ; RV32-NEXT: vand.vv v8, v8, v10
1679 ; RV32-NEXT: vadd.vv v8, v8, v8
1680 ; RV32-NEXT: vor.vv v8, v9, v8
1681 ; RV32-NEXT: addi sp, sp, 16
1684 ; RV64-LABEL: vp_bitreverse_nxv1i64_unmasked:
1686 ; RV64-NEXT: lui a1, 4080
1687 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1688 ; RV64-NEXT: vand.vx v9, v8, a1
1689 ; RV64-NEXT: vsll.vi v9, v9, 24
1690 ; RV64-NEXT: li a0, 255
1691 ; RV64-NEXT: slli a0, a0, 24
1692 ; RV64-NEXT: vand.vx v10, v8, a0
1693 ; RV64-NEXT: vsll.vi v10, v10, 8
1694 ; RV64-NEXT: vor.vv v9, v9, v10
1695 ; RV64-NEXT: li a2, 56
1696 ; RV64-NEXT: vsll.vx v10, v8, a2
1697 ; RV64-NEXT: lui a3, 16
1698 ; RV64-NEXT: addiw a3, a3, -256
1699 ; RV64-NEXT: vand.vx v11, v8, a3
1700 ; RV64-NEXT: li a4, 40
1701 ; RV64-NEXT: vsll.vx v11, v11, a4
1702 ; RV64-NEXT: vor.vv v10, v10, v11
1703 ; RV64-NEXT: vor.vv v9, v10, v9
1704 ; RV64-NEXT: vsrl.vx v10, v8, a2
1705 ; RV64-NEXT: vsrl.vx v11, v8, a4
1706 ; RV64-NEXT: vand.vx v11, v11, a3
1707 ; RV64-NEXT: vor.vv v10, v11, v10
1708 ; RV64-NEXT: vsrl.vi v11, v8, 24
1709 ; RV64-NEXT: vand.vx v11, v11, a1
1710 ; RV64-NEXT: vsrl.vi v8, v8, 8
1711 ; RV64-NEXT: vand.vx v8, v8, a0
1712 ; RV64-NEXT: vor.vv v8, v8, v11
1713 ; RV64-NEXT: vor.vv v8, v8, v10
1714 ; RV64-NEXT: vor.vv v8, v9, v8
1715 ; RV64-NEXT: vsrl.vi v9, v8, 4
1716 ; RV64-NEXT: lui a0, 61681
1717 ; RV64-NEXT: addiw a0, a0, -241
1718 ; RV64-NEXT: slli a1, a0, 32
1719 ; RV64-NEXT: add a0, a0, a1
1720 ; RV64-NEXT: vand.vx v9, v9, a0
1721 ; RV64-NEXT: vand.vx v8, v8, a0
1722 ; RV64-NEXT: vsll.vi v8, v8, 4
1723 ; RV64-NEXT: vor.vv v8, v9, v8
1724 ; RV64-NEXT: vsrl.vi v9, v8, 2
1725 ; RV64-NEXT: lui a0, 209715
1726 ; RV64-NEXT: addiw a0, a0, 819
1727 ; RV64-NEXT: slli a1, a0, 32
1728 ; RV64-NEXT: add a0, a0, a1
1729 ; RV64-NEXT: vand.vx v9, v9, a0
1730 ; RV64-NEXT: vand.vx v8, v8, a0
1731 ; RV64-NEXT: vsll.vi v8, v8, 2
1732 ; RV64-NEXT: vor.vv v8, v9, v8
1733 ; RV64-NEXT: vsrl.vi v9, v8, 1
1734 ; RV64-NEXT: lui a0, 349525
1735 ; RV64-NEXT: addiw a0, a0, 1365
1736 ; RV64-NEXT: slli a1, a0, 32
1737 ; RV64-NEXT: add a0, a0, a1
1738 ; RV64-NEXT: vand.vx v9, v9, a0
1739 ; RV64-NEXT: vand.vx v8, v8, a0
1740 ; RV64-NEXT: vadd.vv v8, v8, v8
1741 ; RV64-NEXT: vor.vv v8, v9, v8
1744 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64_unmasked:
1745 ; CHECK-ZVBB: # %bb.0:
1746 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1747 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1748 ; CHECK-ZVBB-NEXT: ret
1749 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
1750 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1751 %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
1752 ret <vscale x 1 x i64> %v
1755 declare <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1757 define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1758 ; RV32-LABEL: vp_bitreverse_nxv2i64:
1760 ; RV32-NEXT: addi sp, sp, -16
1761 ; RV32-NEXT: .cfi_def_cfa_offset 16
1762 ; RV32-NEXT: sw zero, 12(sp)
1763 ; RV32-NEXT: lui a1, 1044480
1764 ; RV32-NEXT: sw a1, 8(sp)
1765 ; RV32-NEXT: li a1, 56
1766 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1767 ; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
1768 ; RV32-NEXT: lui a2, 16
1769 ; RV32-NEXT: addi a2, a2, -256
1770 ; RV32-NEXT: vand.vx v12, v8, a2, v0.t
1771 ; RV32-NEXT: li a3, 40
1772 ; RV32-NEXT: vsll.vx v12, v12, a3, v0.t
1773 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
1774 ; RV32-NEXT: lui a4, 4080
1775 ; RV32-NEXT: vand.vx v12, v8, a4, v0.t
1776 ; RV32-NEXT: vsll.vi v12, v12, 24, v0.t
1777 ; RV32-NEXT: addi a5, sp, 8
1778 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma
1779 ; RV32-NEXT: vlse64.v v14, (a5), zero
1780 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1781 ; RV32-NEXT: vand.vv v16, v8, v14, v0.t
1782 ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
1783 ; RV32-NEXT: vor.vv v12, v12, v16, v0.t
1784 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
1785 ; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t
1786 ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t
1787 ; RV32-NEXT: vand.vx v16, v16, a2, v0.t
1788 ; RV32-NEXT: vor.vv v12, v16, v12, v0.t
1789 ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
1790 ; RV32-NEXT: vand.vx v16, v16, a4, v0.t
1791 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1792 ; RV32-NEXT: vand.vv v8, v8, v14, v0.t
1793 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1794 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
1795 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1796 ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
1797 ; RV32-NEXT: lui a1, 61681
1798 ; RV32-NEXT: addi a1, a1, -241
1799 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1800 ; RV32-NEXT: vmv.v.x v12, a1
1801 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1802 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
1803 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1804 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1805 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1806 ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
1807 ; RV32-NEXT: lui a1, 209715
1808 ; RV32-NEXT: addi a1, a1, 819
1809 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1810 ; RV32-NEXT: vmv.v.x v12, a1
1811 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1812 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
1813 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1814 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1815 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1816 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
1817 ; RV32-NEXT: lui a1, 349525
1818 ; RV32-NEXT: addi a1, a1, 1365
1819 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1820 ; RV32-NEXT: vmv.v.x v12, a1
1821 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1822 ; RV32-NEXT: vand.vv v10, v10, v12, v0.t
1823 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1824 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
1825 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1826 ; RV32-NEXT: addi sp, sp, 16
1829 ; RV64-LABEL: vp_bitreverse_nxv2i64:
1831 ; RV64-NEXT: lui a1, 4080
1832 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1833 ; RV64-NEXT: vand.vx v10, v8, a1, v0.t
1834 ; RV64-NEXT: vsll.vi v10, v10, 24, v0.t
1835 ; RV64-NEXT: li a0, 255
1836 ; RV64-NEXT: slli a0, a0, 24
1837 ; RV64-NEXT: vand.vx v12, v8, a0, v0.t
1838 ; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
1839 ; RV64-NEXT: vor.vv v10, v10, v12, v0.t
1840 ; RV64-NEXT: li a2, 56
1841 ; RV64-NEXT: vsll.vx v12, v8, a2, v0.t
1842 ; RV64-NEXT: lui a3, 16
1843 ; RV64-NEXT: addiw a3, a3, -256
1844 ; RV64-NEXT: vand.vx v14, v8, a3, v0.t
1845 ; RV64-NEXT: li a4, 40
1846 ; RV64-NEXT: vsll.vx v14, v14, a4, v0.t
1847 ; RV64-NEXT: vor.vv v12, v12, v14, v0.t
1848 ; RV64-NEXT: vor.vv v10, v12, v10, v0.t
1849 ; RV64-NEXT: vsrl.vx v12, v8, a2, v0.t
1850 ; RV64-NEXT: vsrl.vx v14, v8, a4, v0.t
1851 ; RV64-NEXT: vand.vx v14, v14, a3, v0.t
1852 ; RV64-NEXT: vor.vv v12, v14, v12, v0.t
1853 ; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t
1854 ; RV64-NEXT: vand.vx v14, v14, a1, v0.t
1855 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1856 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1857 ; RV64-NEXT: vor.vv v8, v8, v14, v0.t
1858 ; RV64-NEXT: vor.vv v8, v8, v12, v0.t
1859 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1860 ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
1861 ; RV64-NEXT: lui a0, 61681
1862 ; RV64-NEXT: addiw a0, a0, -241
1863 ; RV64-NEXT: slli a1, a0, 32
1864 ; RV64-NEXT: add a0, a0, a1
1865 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t
1866 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1867 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
1868 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1869 ; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
1870 ; RV64-NEXT: lui a0, 209715
1871 ; RV64-NEXT: addiw a0, a0, 819
1872 ; RV64-NEXT: slli a1, a0, 32
1873 ; RV64-NEXT: add a0, a0, a1
1874 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t
1875 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1876 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
1877 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1878 ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
1879 ; RV64-NEXT: lui a0, 349525
1880 ; RV64-NEXT: addiw a0, a0, 1365
1881 ; RV64-NEXT: slli a1, a0, 32
1882 ; RV64-NEXT: add a0, a0, a1
1883 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t
1884 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1885 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
1886 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1889 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64:
1890 ; CHECK-ZVBB: # %bb.0:
1891 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1892 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1893 ; CHECK-ZVBB-NEXT: ret
1894 %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
1895 ret <vscale x 2 x i64> %v
1898 define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
1899 ; RV32-LABEL: vp_bitreverse_nxv2i64_unmasked:
1901 ; RV32-NEXT: addi sp, sp, -16
1902 ; RV32-NEXT: .cfi_def_cfa_offset 16
1903 ; RV32-NEXT: sw zero, 12(sp)
1904 ; RV32-NEXT: lui a1, 1044480
1905 ; RV32-NEXT: sw a1, 8(sp)
1906 ; RV32-NEXT: li a1, 56
1907 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1908 ; RV32-NEXT: vsll.vx v10, v8, a1
1909 ; RV32-NEXT: lui a2, 16
1910 ; RV32-NEXT: addi a2, a2, -256
1911 ; RV32-NEXT: vand.vx v12, v8, a2
1912 ; RV32-NEXT: li a3, 40
1913 ; RV32-NEXT: vsll.vx v12, v12, a3
1914 ; RV32-NEXT: vor.vv v10, v10, v12
1915 ; RV32-NEXT: lui a4, 4080
1916 ; RV32-NEXT: vand.vx v12, v8, a4
1917 ; RV32-NEXT: vsll.vi v12, v12, 24
1918 ; RV32-NEXT: addi a5, sp, 8
1919 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma
1920 ; RV32-NEXT: vlse64.v v14, (a5), zero
1921 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1922 ; RV32-NEXT: vand.vv v16, v8, v14
1923 ; RV32-NEXT: vsll.vi v16, v16, 8
1924 ; RV32-NEXT: vor.vv v12, v12, v16
1925 ; RV32-NEXT: vor.vv v10, v10, v12
1926 ; RV32-NEXT: vsrl.vx v12, v8, a1
1927 ; RV32-NEXT: vsrl.vx v16, v8, a3
1928 ; RV32-NEXT: vand.vx v16, v16, a2
1929 ; RV32-NEXT: vor.vv v12, v16, v12
1930 ; RV32-NEXT: vsrl.vi v16, v8, 24
1931 ; RV32-NEXT: vand.vx v16, v16, a4
1932 ; RV32-NEXT: vsrl.vi v8, v8, 8
1933 ; RV32-NEXT: vand.vv v8, v8, v14
1934 ; RV32-NEXT: vor.vv v8, v8, v16
1935 ; RV32-NEXT: vor.vv v8, v8, v12
1936 ; RV32-NEXT: vor.vv v8, v10, v8
1937 ; RV32-NEXT: vsrl.vi v10, v8, 4
1938 ; RV32-NEXT: lui a1, 61681
1939 ; RV32-NEXT: addi a1, a1, -241
1940 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1941 ; RV32-NEXT: vmv.v.x v12, a1
1942 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1943 ; RV32-NEXT: vand.vv v10, v10, v12
1944 ; RV32-NEXT: vand.vv v8, v8, v12
1945 ; RV32-NEXT: vsll.vi v8, v8, 4
1946 ; RV32-NEXT: vor.vv v8, v10, v8
1947 ; RV32-NEXT: vsrl.vi v10, v8, 2
1948 ; RV32-NEXT: lui a1, 209715
1949 ; RV32-NEXT: addi a1, a1, 819
1950 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1951 ; RV32-NEXT: vmv.v.x v12, a1
1952 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1953 ; RV32-NEXT: vand.vv v10, v10, v12
1954 ; RV32-NEXT: vand.vv v8, v8, v12
1955 ; RV32-NEXT: vsll.vi v8, v8, 2
1956 ; RV32-NEXT: vor.vv v8, v10, v8
1957 ; RV32-NEXT: vsrl.vi v10, v8, 1
1958 ; RV32-NEXT: lui a1, 349525
1959 ; RV32-NEXT: addi a1, a1, 1365
1960 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
1961 ; RV32-NEXT: vmv.v.x v12, a1
1962 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1963 ; RV32-NEXT: vand.vv v10, v10, v12
1964 ; RV32-NEXT: vand.vv v8, v8, v12
1965 ; RV32-NEXT: vadd.vv v8, v8, v8
1966 ; RV32-NEXT: vor.vv v8, v10, v8
1967 ; RV32-NEXT: addi sp, sp, 16
1970 ; RV64-LABEL: vp_bitreverse_nxv2i64_unmasked:
1972 ; RV64-NEXT: lui a1, 4080
1973 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1974 ; RV64-NEXT: vand.vx v10, v8, a1
1975 ; RV64-NEXT: vsll.vi v10, v10, 24
1976 ; RV64-NEXT: li a0, 255
1977 ; RV64-NEXT: slli a0, a0, 24
1978 ; RV64-NEXT: vand.vx v12, v8, a0
1979 ; RV64-NEXT: vsll.vi v12, v12, 8
1980 ; RV64-NEXT: vor.vv v10, v10, v12
1981 ; RV64-NEXT: li a2, 56
1982 ; RV64-NEXT: vsll.vx v12, v8, a2
1983 ; RV64-NEXT: lui a3, 16
1984 ; RV64-NEXT: addiw a3, a3, -256
1985 ; RV64-NEXT: vand.vx v14, v8, a3
1986 ; RV64-NEXT: li a4, 40
1987 ; RV64-NEXT: vsll.vx v14, v14, a4
1988 ; RV64-NEXT: vor.vv v12, v12, v14
1989 ; RV64-NEXT: vor.vv v10, v12, v10
1990 ; RV64-NEXT: vsrl.vx v12, v8, a2
1991 ; RV64-NEXT: vsrl.vx v14, v8, a4
1992 ; RV64-NEXT: vand.vx v14, v14, a3
1993 ; RV64-NEXT: vor.vv v12, v14, v12
1994 ; RV64-NEXT: vsrl.vi v14, v8, 24
1995 ; RV64-NEXT: vand.vx v14, v14, a1
1996 ; RV64-NEXT: vsrl.vi v8, v8, 8
1997 ; RV64-NEXT: vand.vx v8, v8, a0
1998 ; RV64-NEXT: vor.vv v8, v8, v14
1999 ; RV64-NEXT: vor.vv v8, v8, v12
2000 ; RV64-NEXT: vor.vv v8, v10, v8
2001 ; RV64-NEXT: vsrl.vi v10, v8, 4
2002 ; RV64-NEXT: lui a0, 61681
2003 ; RV64-NEXT: addiw a0, a0, -241
2004 ; RV64-NEXT: slli a1, a0, 32
2005 ; RV64-NEXT: add a0, a0, a1
2006 ; RV64-NEXT: vand.vx v10, v10, a0
2007 ; RV64-NEXT: vand.vx v8, v8, a0
2008 ; RV64-NEXT: vsll.vi v8, v8, 4
2009 ; RV64-NEXT: vor.vv v8, v10, v8
2010 ; RV64-NEXT: vsrl.vi v10, v8, 2
2011 ; RV64-NEXT: lui a0, 209715
2012 ; RV64-NEXT: addiw a0, a0, 819
2013 ; RV64-NEXT: slli a1, a0, 32
2014 ; RV64-NEXT: add a0, a0, a1
2015 ; RV64-NEXT: vand.vx v10, v10, a0
2016 ; RV64-NEXT: vand.vx v8, v8, a0
2017 ; RV64-NEXT: vsll.vi v8, v8, 2
2018 ; RV64-NEXT: vor.vv v8, v10, v8
2019 ; RV64-NEXT: vsrl.vi v10, v8, 1
2020 ; RV64-NEXT: lui a0, 349525
2021 ; RV64-NEXT: addiw a0, a0, 1365
2022 ; RV64-NEXT: slli a1, a0, 32
2023 ; RV64-NEXT: add a0, a0, a1
2024 ; RV64-NEXT: vand.vx v10, v10, a0
2025 ; RV64-NEXT: vand.vx v8, v8, a0
2026 ; RV64-NEXT: vadd.vv v8, v8, v8
2027 ; RV64-NEXT: vor.vv v8, v10, v8
2030 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64_unmasked:
2031 ; CHECK-ZVBB: # %bb.0:
2032 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
2033 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
2034 ; CHECK-ZVBB-NEXT: ret
2035 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
2036 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
2037 %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
2038 ret <vscale x 2 x i64> %v
2041 declare <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2043 define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2044 ; RV32-LABEL: vp_bitreverse_nxv4i64:
2046 ; RV32-NEXT: addi sp, sp, -16
2047 ; RV32-NEXT: .cfi_def_cfa_offset 16
2048 ; RV32-NEXT: sw zero, 12(sp)
2049 ; RV32-NEXT: lui a1, 1044480
2050 ; RV32-NEXT: sw a1, 8(sp)
2051 ; RV32-NEXT: li a1, 56
2052 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2053 ; RV32-NEXT: vsll.vx v12, v8, a1, v0.t
2054 ; RV32-NEXT: lui a2, 16
2055 ; RV32-NEXT: addi a2, a2, -256
2056 ; RV32-NEXT: vand.vx v16, v8, a2, v0.t
2057 ; RV32-NEXT: li a3, 40
2058 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t
2059 ; RV32-NEXT: vor.vv v16, v12, v16, v0.t
2060 ; RV32-NEXT: lui a4, 4080
2061 ; RV32-NEXT: vand.vx v12, v8, a4, v0.t
2062 ; RV32-NEXT: vsll.vi v20, v12, 24, v0.t
2063 ; RV32-NEXT: addi a5, sp, 8
2064 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma
2065 ; RV32-NEXT: vlse64.v v12, (a5), zero
2066 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2067 ; RV32-NEXT: vand.vv v24, v8, v12, v0.t
2068 ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
2069 ; RV32-NEXT: vor.vv v20, v20, v24, v0.t
2070 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
2071 ; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t
2072 ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
2073 ; RV32-NEXT: vand.vx v24, v24, a2, v0.t
2074 ; RV32-NEXT: vor.vv v20, v24, v20, v0.t
2075 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
2076 ; RV32-NEXT: vand.vx v24, v24, a4, v0.t
2077 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2078 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
2079 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
2080 ; RV32-NEXT: vor.vv v8, v8, v20, v0.t
2081 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2082 ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
2083 ; RV32-NEXT: lui a1, 61681
2084 ; RV32-NEXT: addi a1, a1, -241
2085 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2086 ; RV32-NEXT: vmv.v.x v16, a1
2087 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2088 ; RV32-NEXT: vand.vv v12, v12, v16, v0.t
2089 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2090 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
2091 ; RV32-NEXT: vor.vv v8, v12, v8, v0.t
2092 ; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
2093 ; RV32-NEXT: lui a1, 209715
2094 ; RV32-NEXT: addi a1, a1, 819
2095 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2096 ; RV32-NEXT: vmv.v.x v16, a1
2097 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2098 ; RV32-NEXT: vand.vv v12, v12, v16, v0.t
2099 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2100 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
2101 ; RV32-NEXT: vor.vv v8, v12, v8, v0.t
2102 ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
2103 ; RV32-NEXT: lui a1, 349525
2104 ; RV32-NEXT: addi a1, a1, 1365
2105 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2106 ; RV32-NEXT: vmv.v.x v16, a1
2107 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2108 ; RV32-NEXT: vand.vv v12, v12, v16, v0.t
2109 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2110 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
2111 ; RV32-NEXT: vor.vv v8, v12, v8, v0.t
2112 ; RV32-NEXT: addi sp, sp, 16
2115 ; RV64-LABEL: vp_bitreverse_nxv4i64:
2117 ; RV64-NEXT: lui a1, 4080
2118 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2119 ; RV64-NEXT: vand.vx v12, v8, a1, v0.t
2120 ; RV64-NEXT: vsll.vi v12, v12, 24, v0.t
2121 ; RV64-NEXT: li a0, 255
2122 ; RV64-NEXT: slli a0, a0, 24
2123 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
2124 ; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
2125 ; RV64-NEXT: vor.vv v12, v12, v16, v0.t
2126 ; RV64-NEXT: li a2, 56
2127 ; RV64-NEXT: vsll.vx v16, v8, a2, v0.t
2128 ; RV64-NEXT: lui a3, 16
2129 ; RV64-NEXT: addiw a3, a3, -256
2130 ; RV64-NEXT: vand.vx v20, v8, a3, v0.t
2131 ; RV64-NEXT: li a4, 40
2132 ; RV64-NEXT: vsll.vx v20, v20, a4, v0.t
2133 ; RV64-NEXT: vor.vv v16, v16, v20, v0.t
2134 ; RV64-NEXT: vor.vv v12, v16, v12, v0.t
2135 ; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
2136 ; RV64-NEXT: vsrl.vx v20, v8, a4, v0.t
2137 ; RV64-NEXT: vand.vx v20, v20, a3, v0.t
2138 ; RV64-NEXT: vor.vv v16, v20, v16, v0.t
2139 ; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t
2140 ; RV64-NEXT: vand.vx v20, v20, a1, v0.t
2141 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
2142 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2143 ; RV64-NEXT: vor.vv v8, v8, v20, v0.t
2144 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
2145 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2146 ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
2147 ; RV64-NEXT: lui a0, 61681
2148 ; RV64-NEXT: addiw a0, a0, -241
2149 ; RV64-NEXT: slli a1, a0, 32
2150 ; RV64-NEXT: add a0, a0, a1
2151 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t
2152 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2153 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
2154 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2155 ; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
2156 ; RV64-NEXT: lui a0, 209715
2157 ; RV64-NEXT: addiw a0, a0, 819
2158 ; RV64-NEXT: slli a1, a0, 32
2159 ; RV64-NEXT: add a0, a0, a1
2160 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t
2161 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2162 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
2163 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2164 ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
2165 ; RV64-NEXT: lui a0, 349525
2166 ; RV64-NEXT: addiw a0, a0, 1365
2167 ; RV64-NEXT: slli a1, a0, 32
2168 ; RV64-NEXT: add a0, a0, a1
2169 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t
2170 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2171 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
2172 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2175 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64:
2176 ; CHECK-ZVBB: # %bb.0:
2177 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2178 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
2179 ; CHECK-ZVBB-NEXT: ret
2180 %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
2181 ret <vscale x 4 x i64> %v
2184 define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
2185 ; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked:
2187 ; RV32-NEXT: addi sp, sp, -16
2188 ; RV32-NEXT: .cfi_def_cfa_offset 16
2189 ; RV32-NEXT: sw zero, 12(sp)
2190 ; RV32-NEXT: lui a1, 1044480
2191 ; RV32-NEXT: sw a1, 8(sp)
2192 ; RV32-NEXT: li a1, 56
2193 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2194 ; RV32-NEXT: vsll.vx v12, v8, a1
2195 ; RV32-NEXT: lui a2, 16
2196 ; RV32-NEXT: addi a2, a2, -256
2197 ; RV32-NEXT: vand.vx v16, v8, a2
2198 ; RV32-NEXT: li a3, 40
2199 ; RV32-NEXT: vsll.vx v16, v16, a3
2200 ; RV32-NEXT: vor.vv v12, v12, v16
2201 ; RV32-NEXT: lui a4, 4080
2202 ; RV32-NEXT: vand.vx v16, v8, a4
2203 ; RV32-NEXT: vsll.vi v16, v16, 24
2204 ; RV32-NEXT: addi a5, sp, 8
2205 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma
2206 ; RV32-NEXT: vlse64.v v20, (a5), zero
2207 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2208 ; RV32-NEXT: vand.vv v24, v8, v20
2209 ; RV32-NEXT: vsll.vi v24, v24, 8
2210 ; RV32-NEXT: vor.vv v16, v16, v24
2211 ; RV32-NEXT: vor.vv v12, v12, v16
2212 ; RV32-NEXT: vsrl.vx v16, v8, a1
2213 ; RV32-NEXT: vsrl.vx v24, v8, a3
2214 ; RV32-NEXT: vand.vx v24, v24, a2
2215 ; RV32-NEXT: vor.vv v16, v24, v16
2216 ; RV32-NEXT: vsrl.vi v24, v8, 24
2217 ; RV32-NEXT: vand.vx v24, v24, a4
2218 ; RV32-NEXT: vsrl.vi v8, v8, 8
2219 ; RV32-NEXT: vand.vv v8, v8, v20
2220 ; RV32-NEXT: vor.vv v8, v8, v24
2221 ; RV32-NEXT: vor.vv v8, v8, v16
2222 ; RV32-NEXT: vor.vv v8, v12, v8
2223 ; RV32-NEXT: vsrl.vi v12, v8, 4
2224 ; RV32-NEXT: lui a1, 61681
2225 ; RV32-NEXT: addi a1, a1, -241
2226 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2227 ; RV32-NEXT: vmv.v.x v16, a1
2228 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2229 ; RV32-NEXT: vand.vv v12, v12, v16
2230 ; RV32-NEXT: vand.vv v8, v8, v16
2231 ; RV32-NEXT: vsll.vi v8, v8, 4
2232 ; RV32-NEXT: vor.vv v8, v12, v8
2233 ; RV32-NEXT: vsrl.vi v12, v8, 2
2234 ; RV32-NEXT: lui a1, 209715
2235 ; RV32-NEXT: addi a1, a1, 819
2236 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2237 ; RV32-NEXT: vmv.v.x v16, a1
2238 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2239 ; RV32-NEXT: vand.vv v12, v12, v16
2240 ; RV32-NEXT: vand.vv v8, v8, v16
2241 ; RV32-NEXT: vsll.vi v8, v8, 2
2242 ; RV32-NEXT: vor.vv v8, v12, v8
2243 ; RV32-NEXT: vsrl.vi v12, v8, 1
2244 ; RV32-NEXT: lui a1, 349525
2245 ; RV32-NEXT: addi a1, a1, 1365
2246 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2247 ; RV32-NEXT: vmv.v.x v16, a1
2248 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2249 ; RV32-NEXT: vand.vv v12, v12, v16
2250 ; RV32-NEXT: vand.vv v8, v8, v16
2251 ; RV32-NEXT: vadd.vv v8, v8, v8
2252 ; RV32-NEXT: vor.vv v8, v12, v8
2253 ; RV32-NEXT: addi sp, sp, 16
2256 ; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked:
2258 ; RV64-NEXT: lui a1, 4080
2259 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2260 ; RV64-NEXT: vand.vx v12, v8, a1
2261 ; RV64-NEXT: vsll.vi v12, v12, 24
2262 ; RV64-NEXT: li a0, 255
2263 ; RV64-NEXT: slli a0, a0, 24
2264 ; RV64-NEXT: vand.vx v16, v8, a0
2265 ; RV64-NEXT: vsll.vi v16, v16, 8
2266 ; RV64-NEXT: vor.vv v12, v12, v16
2267 ; RV64-NEXT: li a2, 56
2268 ; RV64-NEXT: vsll.vx v16, v8, a2
2269 ; RV64-NEXT: lui a3, 16
2270 ; RV64-NEXT: addiw a3, a3, -256
2271 ; RV64-NEXT: vand.vx v20, v8, a3
2272 ; RV64-NEXT: li a4, 40
2273 ; RV64-NEXT: vsll.vx v20, v20, a4
2274 ; RV64-NEXT: vor.vv v16, v16, v20
2275 ; RV64-NEXT: vor.vv v12, v16, v12
2276 ; RV64-NEXT: vsrl.vx v16, v8, a2
2277 ; RV64-NEXT: vsrl.vx v20, v8, a4
2278 ; RV64-NEXT: vand.vx v20, v20, a3
2279 ; RV64-NEXT: vor.vv v16, v20, v16
2280 ; RV64-NEXT: vsrl.vi v20, v8, 24
2281 ; RV64-NEXT: vand.vx v20, v20, a1
2282 ; RV64-NEXT: vsrl.vi v8, v8, 8
2283 ; RV64-NEXT: vand.vx v8, v8, a0
2284 ; RV64-NEXT: vor.vv v8, v8, v20
2285 ; RV64-NEXT: vor.vv v8, v8, v16
2286 ; RV64-NEXT: vor.vv v8, v12, v8
2287 ; RV64-NEXT: vsrl.vi v12, v8, 4
2288 ; RV64-NEXT: lui a0, 61681
2289 ; RV64-NEXT: addiw a0, a0, -241
2290 ; RV64-NEXT: slli a1, a0, 32
2291 ; RV64-NEXT: add a0, a0, a1
2292 ; RV64-NEXT: vand.vx v12, v12, a0
2293 ; RV64-NEXT: vand.vx v8, v8, a0
2294 ; RV64-NEXT: vsll.vi v8, v8, 4
2295 ; RV64-NEXT: vor.vv v8, v12, v8
2296 ; RV64-NEXT: vsrl.vi v12, v8, 2
2297 ; RV64-NEXT: lui a0, 209715
2298 ; RV64-NEXT: addiw a0, a0, 819
2299 ; RV64-NEXT: slli a1, a0, 32
2300 ; RV64-NEXT: add a0, a0, a1
2301 ; RV64-NEXT: vand.vx v12, v12, a0
2302 ; RV64-NEXT: vand.vx v8, v8, a0
2303 ; RV64-NEXT: vsll.vi v8, v8, 2
2304 ; RV64-NEXT: vor.vv v8, v12, v8
2305 ; RV64-NEXT: vsrl.vi v12, v8, 1
2306 ; RV64-NEXT: lui a0, 349525
2307 ; RV64-NEXT: addiw a0, a0, 1365
2308 ; RV64-NEXT: slli a1, a0, 32
2309 ; RV64-NEXT: add a0, a0, a1
2310 ; RV64-NEXT: vand.vx v12, v12, a0
2311 ; RV64-NEXT: vand.vx v8, v8, a0
2312 ; RV64-NEXT: vadd.vv v8, v8, v8
2313 ; RV64-NEXT: vor.vv v8, v12, v8
2316 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64_unmasked:
2317 ; CHECK-ZVBB: # %bb.0:
2318 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2319 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
2320 ; CHECK-ZVBB-NEXT: ret
2321 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
2322 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2323 %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
2324 ret <vscale x 4 x i64> %v
2327 declare <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32)
2329 define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
2330 ; RV32-LABEL: vp_bitreverse_nxv7i64:
2332 ; RV32-NEXT: addi sp, sp, -16
2333 ; RV32-NEXT: .cfi_def_cfa_offset 16
2334 ; RV32-NEXT: csrr a1, vlenb
2335 ; RV32-NEXT: li a2, 24
2336 ; RV32-NEXT: mul a1, a1, a2
2337 ; RV32-NEXT: sub sp, sp, a1
2338 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
2339 ; RV32-NEXT: sw zero, 12(sp)
2340 ; RV32-NEXT: lui a1, 1044480
2341 ; RV32-NEXT: sw a1, 8(sp)
2342 ; RV32-NEXT: li a1, 56
2343 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2344 ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
2345 ; RV32-NEXT: lui a2, 16
2346 ; RV32-NEXT: addi a2, a2, -256
2347 ; RV32-NEXT: vand.vx v24, v8, a2, v0.t
2348 ; RV32-NEXT: li a3, 40
2349 ; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
2350 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2351 ; RV32-NEXT: csrr a4, vlenb
2352 ; RV32-NEXT: slli a4, a4, 4
2353 ; RV32-NEXT: add a4, sp, a4
2354 ; RV32-NEXT: addi a4, a4, 16
2355 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2356 ; RV32-NEXT: lui a4, 4080
2357 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t
2358 ; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
2359 ; RV32-NEXT: addi a5, sp, 8
2360 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
2361 ; RV32-NEXT: vlse64.v v16, (a5), zero
2362 ; RV32-NEXT: csrr a5, vlenb
2363 ; RV32-NEXT: slli a5, a5, 3
2364 ; RV32-NEXT: add a5, sp, a5
2365 ; RV32-NEXT: addi a5, a5, 16
2366 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2367 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2368 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2369 ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
2370 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2371 ; RV32-NEXT: csrr a5, vlenb
2372 ; RV32-NEXT: slli a5, a5, 4
2373 ; RV32-NEXT: add a5, sp, a5
2374 ; RV32-NEXT: addi a5, a5, 16
2375 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2376 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2377 ; RV32-NEXT: csrr a5, vlenb
2378 ; RV32-NEXT: slli a5, a5, 4
2379 ; RV32-NEXT: add a5, sp, a5
2380 ; RV32-NEXT: addi a5, a5, 16
2381 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2382 ; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t
2383 ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t
2384 ; RV32-NEXT: vand.vx v16, v16, a2, v0.t
2385 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2386 ; RV32-NEXT: addi a1, sp, 16
2387 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2388 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
2389 ; RV32-NEXT: vand.vx v24, v24, a4, v0.t
2390 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2391 ; RV32-NEXT: csrr a1, vlenb
2392 ; RV32-NEXT: slli a1, a1, 3
2393 ; RV32-NEXT: add a1, sp, a1
2394 ; RV32-NEXT: addi a1, a1, 16
2395 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2396 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2397 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
2398 ; RV32-NEXT: addi a1, sp, 16
2399 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2400 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
2401 ; RV32-NEXT: csrr a1, vlenb
2402 ; RV32-NEXT: slli a1, a1, 4
2403 ; RV32-NEXT: add a1, sp, a1
2404 ; RV32-NEXT: addi a1, a1, 16
2405 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2406 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2407 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2408 ; RV32-NEXT: lui a1, 61681
2409 ; RV32-NEXT: addi a1, a1, -241
2410 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2411 ; RV32-NEXT: vmv.v.x v24, a1
2412 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2413 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2414 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2415 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
2416 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2417 ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
2418 ; RV32-NEXT: lui a1, 209715
2419 ; RV32-NEXT: addi a1, a1, 819
2420 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2421 ; RV32-NEXT: vmv.v.x v24, a1
2422 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2423 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2424 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2425 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
2426 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2427 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
2428 ; RV32-NEXT: lui a1, 349525
2429 ; RV32-NEXT: addi a1, a1, 1365
2430 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2431 ; RV32-NEXT: vmv.v.x v24, a1
2432 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2433 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2434 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2435 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
2436 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2437 ; RV32-NEXT: csrr a0, vlenb
2438 ; RV32-NEXT: li a1, 24
2439 ; RV32-NEXT: mul a0, a0, a1
2440 ; RV32-NEXT: add sp, sp, a0
2441 ; RV32-NEXT: addi sp, sp, 16
2444 ; RV64-LABEL: vp_bitreverse_nxv7i64:
2446 ; RV64-NEXT: addi sp, sp, -16
2447 ; RV64-NEXT: .cfi_def_cfa_offset 16
2448 ; RV64-NEXT: csrr a1, vlenb
2449 ; RV64-NEXT: slli a1, a1, 3
2450 ; RV64-NEXT: sub sp, sp, a1
2451 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2452 ; RV64-NEXT: lui a1, 4080
2453 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2454 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
2455 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
2456 ; RV64-NEXT: li a0, 255
2457 ; RV64-NEXT: slli a0, a0, 24
2458 ; RV64-NEXT: vand.vx v24, v8, a0, v0.t
2459 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
2460 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2461 ; RV64-NEXT: addi a2, sp, 16
2462 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
2463 ; RV64-NEXT: li a2, 56
2464 ; RV64-NEXT: vsll.vx v24, v8, a2, v0.t
2465 ; RV64-NEXT: lui a3, 16
2466 ; RV64-NEXT: addiw a3, a3, -256
2467 ; RV64-NEXT: li a4, 40
2468 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t
2469 ; RV64-NEXT: vsll.vx v16, v16, a4, v0.t
2470 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
2471 ; RV64-NEXT: addi a5, sp, 16
2472 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2473 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2474 ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2475 ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t
2476 ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
2477 ; RV64-NEXT: vand.vx v16, v16, a3, v0.t
2478 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
2479 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
2480 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
2481 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
2482 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2483 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
2484 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
2485 ; RV64-NEXT: addi a0, sp, 16
2486 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2487 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2488 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
2489 ; RV64-NEXT: lui a0, 61681
2490 ; RV64-NEXT: addiw a0, a0, -241
2491 ; RV64-NEXT: slli a1, a0, 32
2492 ; RV64-NEXT: add a0, a0, a1
2493 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2494 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2495 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
2496 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2497 ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
2498 ; RV64-NEXT: lui a0, 209715
2499 ; RV64-NEXT: addiw a0, a0, 819
2500 ; RV64-NEXT: slli a1, a0, 32
2501 ; RV64-NEXT: add a0, a0, a1
2502 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2503 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2504 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
2505 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2506 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
2507 ; RV64-NEXT: lui a0, 349525
2508 ; RV64-NEXT: addiw a0, a0, 1365
2509 ; RV64-NEXT: slli a1, a0, 32
2510 ; RV64-NEXT: add a0, a0, a1
2511 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2512 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2513 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
2514 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2515 ; RV64-NEXT: csrr a0, vlenb
2516 ; RV64-NEXT: slli a0, a0, 3
2517 ; RV64-NEXT: add sp, sp, a0
2518 ; RV64-NEXT: addi sp, sp, 16
2521 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64:
2522 ; CHECK-ZVBB: # %bb.0:
2523 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2524 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
2525 ; CHECK-ZVBB-NEXT: ret
2526 %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
2527 ret <vscale x 7 x i64> %v
2530 define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
2531 ; RV32-LABEL: vp_bitreverse_nxv7i64_unmasked:
2533 ; RV32-NEXT: addi sp, sp, -16
2534 ; RV32-NEXT: .cfi_def_cfa_offset 16
2535 ; RV32-NEXT: csrr a1, vlenb
2536 ; RV32-NEXT: slli a1, a1, 3
2537 ; RV32-NEXT: sub sp, sp, a1
2538 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2539 ; RV32-NEXT: sw zero, 12(sp)
2540 ; RV32-NEXT: lui a1, 1044480
2541 ; RV32-NEXT: sw a1, 8(sp)
2542 ; RV32-NEXT: li a1, 56
2543 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2544 ; RV32-NEXT: vsll.vx v16, v8, a1
2545 ; RV32-NEXT: lui a2, 16
2546 ; RV32-NEXT: addi a2, a2, -256
2547 ; RV32-NEXT: vand.vx v24, v8, a2
2548 ; RV32-NEXT: li a3, 40
2549 ; RV32-NEXT: vsll.vx v24, v24, a3
2550 ; RV32-NEXT: vor.vv v16, v16, v24
2551 ; RV32-NEXT: addi a4, sp, 16
2552 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2553 ; RV32-NEXT: lui a4, 4080
2554 ; RV32-NEXT: vand.vx v16, v8, a4
2555 ; RV32-NEXT: vsll.vi v0, v16, 24
2556 ; RV32-NEXT: addi a5, sp, 8
2557 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
2558 ; RV32-NEXT: vlse64.v v16, (a5), zero
2559 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2560 ; RV32-NEXT: vand.vv v24, v8, v16
2561 ; RV32-NEXT: vsll.vi v24, v24, 8
2562 ; RV32-NEXT: vor.vv v24, v0, v24
2563 ; RV32-NEXT: addi a5, sp, 16
2564 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload
2565 ; RV32-NEXT: vor.vv v24, v0, v24
2566 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2567 ; RV32-NEXT: vsrl.vx v0, v8, a3
2568 ; RV32-NEXT: vand.vx v0, v0, a2
2569 ; RV32-NEXT: vsrl.vx v24, v8, a1
2570 ; RV32-NEXT: vor.vv v24, v0, v24
2571 ; RV32-NEXT: vsrl.vi v0, v8, 8
2572 ; RV32-NEXT: vand.vv v16, v0, v16
2573 ; RV32-NEXT: vsrl.vi v8, v8, 24
2574 ; RV32-NEXT: vand.vx v8, v8, a4
2575 ; RV32-NEXT: vor.vv v8, v16, v8
2576 ; RV32-NEXT: vor.vv v8, v8, v24
2577 ; RV32-NEXT: addi a1, sp, 16
2578 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2579 ; RV32-NEXT: vor.vv v8, v16, v8
2580 ; RV32-NEXT: vsrl.vi v16, v8, 4
2581 ; RV32-NEXT: lui a1, 61681
2582 ; RV32-NEXT: addi a1, a1, -241
2583 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2584 ; RV32-NEXT: vmv.v.x v24, a1
2585 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2586 ; RV32-NEXT: vand.vv v16, v16, v24
2587 ; RV32-NEXT: vand.vv v8, v8, v24
2588 ; RV32-NEXT: vsll.vi v8, v8, 4
2589 ; RV32-NEXT: vor.vv v8, v16, v8
2590 ; RV32-NEXT: vsrl.vi v16, v8, 2
2591 ; RV32-NEXT: lui a1, 209715
2592 ; RV32-NEXT: addi a1, a1, 819
2593 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2594 ; RV32-NEXT: vmv.v.x v24, a1
2595 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2596 ; RV32-NEXT: vand.vv v16, v16, v24
2597 ; RV32-NEXT: vand.vv v8, v8, v24
2598 ; RV32-NEXT: vsll.vi v8, v8, 2
2599 ; RV32-NEXT: vor.vv v8, v16, v8
2600 ; RV32-NEXT: vsrl.vi v16, v8, 1
2601 ; RV32-NEXT: lui a1, 349525
2602 ; RV32-NEXT: addi a1, a1, 1365
2603 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2604 ; RV32-NEXT: vmv.v.x v24, a1
2605 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2606 ; RV32-NEXT: vand.vv v16, v16, v24
2607 ; RV32-NEXT: vand.vv v8, v8, v24
2608 ; RV32-NEXT: vadd.vv v8, v8, v8
2609 ; RV32-NEXT: vor.vv v8, v16, v8
2610 ; RV32-NEXT: csrr a0, vlenb
2611 ; RV32-NEXT: slli a0, a0, 3
2612 ; RV32-NEXT: add sp, sp, a0
2613 ; RV32-NEXT: addi sp, sp, 16
2616 ; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked:
2618 ; RV64-NEXT: lui a1, 4080
2619 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2620 ; RV64-NEXT: vand.vx v16, v8, a1
2621 ; RV64-NEXT: vsll.vi v16, v16, 24
2622 ; RV64-NEXT: li a0, 255
2623 ; RV64-NEXT: slli a0, a0, 24
2624 ; RV64-NEXT: vand.vx v24, v8, a0
2625 ; RV64-NEXT: vsll.vi v24, v24, 8
2626 ; RV64-NEXT: vor.vv v16, v16, v24
2627 ; RV64-NEXT: li a2, 56
2628 ; RV64-NEXT: vsll.vx v24, v8, a2
2629 ; RV64-NEXT: lui a3, 16
2630 ; RV64-NEXT: addiw a3, a3, -256
2631 ; RV64-NEXT: vand.vx v0, v8, a3
2632 ; RV64-NEXT: li a4, 40
2633 ; RV64-NEXT: vsll.vx v0, v0, a4
2634 ; RV64-NEXT: vor.vv v24, v24, v0
2635 ; RV64-NEXT: vor.vv v16, v24, v16
2636 ; RV64-NEXT: vsrl.vx v24, v8, a2
2637 ; RV64-NEXT: vsrl.vx v0, v8, a4
2638 ; RV64-NEXT: vand.vx v0, v0, a3
2639 ; RV64-NEXT: vor.vv v24, v0, v24
2640 ; RV64-NEXT: vsrl.vi v0, v8, 24
2641 ; RV64-NEXT: vand.vx v0, v0, a1
2642 ; RV64-NEXT: vsrl.vi v8, v8, 8
2643 ; RV64-NEXT: vand.vx v8, v8, a0
2644 ; RV64-NEXT: vor.vv v8, v8, v0
2645 ; RV64-NEXT: vor.vv v8, v8, v24
2646 ; RV64-NEXT: vor.vv v8, v16, v8
2647 ; RV64-NEXT: vsrl.vi v16, v8, 4
2648 ; RV64-NEXT: lui a0, 61681
2649 ; RV64-NEXT: addiw a0, a0, -241
2650 ; RV64-NEXT: slli a1, a0, 32
2651 ; RV64-NEXT: add a0, a0, a1
2652 ; RV64-NEXT: vand.vx v16, v16, a0
2653 ; RV64-NEXT: vand.vx v8, v8, a0
2654 ; RV64-NEXT: vsll.vi v8, v8, 4
2655 ; RV64-NEXT: vor.vv v8, v16, v8
2656 ; RV64-NEXT: vsrl.vi v16, v8, 2
2657 ; RV64-NEXT: lui a0, 209715
2658 ; RV64-NEXT: addiw a0, a0, 819
2659 ; RV64-NEXT: slli a1, a0, 32
2660 ; RV64-NEXT: add a0, a0, a1
2661 ; RV64-NEXT: vand.vx v16, v16, a0
2662 ; RV64-NEXT: vand.vx v8, v8, a0
2663 ; RV64-NEXT: vsll.vi v8, v8, 2
2664 ; RV64-NEXT: vor.vv v8, v16, v8
2665 ; RV64-NEXT: vsrl.vi v16, v8, 1
2666 ; RV64-NEXT: lui a0, 349525
2667 ; RV64-NEXT: addiw a0, a0, 1365
2668 ; RV64-NEXT: slli a1, a0, 32
2669 ; RV64-NEXT: add a0, a0, a1
2670 ; RV64-NEXT: vand.vx v16, v16, a0
2671 ; RV64-NEXT: vand.vx v8, v8, a0
2672 ; RV64-NEXT: vadd.vv v8, v8, v8
2673 ; RV64-NEXT: vor.vv v8, v16, v8
2676 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked:
2677 ; CHECK-ZVBB: # %bb.0:
2678 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2679 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
2680 ; CHECK-ZVBB-NEXT: ret
2681 %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
2682 %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
2683 %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
2684 ret <vscale x 7 x i64> %v
2687 declare <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2689 define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2690 ; RV32-LABEL: vp_bitreverse_nxv8i64:
2692 ; RV32-NEXT: addi sp, sp, -16
2693 ; RV32-NEXT: .cfi_def_cfa_offset 16
2694 ; RV32-NEXT: csrr a1, vlenb
2695 ; RV32-NEXT: li a2, 24
2696 ; RV32-NEXT: mul a1, a1, a2
2697 ; RV32-NEXT: sub sp, sp, a1
2698 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
2699 ; RV32-NEXT: sw zero, 12(sp)
2700 ; RV32-NEXT: lui a1, 1044480
2701 ; RV32-NEXT: sw a1, 8(sp)
2702 ; RV32-NEXT: li a1, 56
2703 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2704 ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
2705 ; RV32-NEXT: lui a2, 16
2706 ; RV32-NEXT: addi a2, a2, -256
2707 ; RV32-NEXT: vand.vx v24, v8, a2, v0.t
2708 ; RV32-NEXT: li a3, 40
2709 ; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
2710 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2711 ; RV32-NEXT: csrr a4, vlenb
2712 ; RV32-NEXT: slli a4, a4, 4
2713 ; RV32-NEXT: add a4, sp, a4
2714 ; RV32-NEXT: addi a4, a4, 16
2715 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2716 ; RV32-NEXT: lui a4, 4080
2717 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t
2718 ; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
2719 ; RV32-NEXT: addi a5, sp, 8
2720 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
2721 ; RV32-NEXT: vlse64.v v16, (a5), zero
2722 ; RV32-NEXT: csrr a5, vlenb
2723 ; RV32-NEXT: slli a5, a5, 3
2724 ; RV32-NEXT: add a5, sp, a5
2725 ; RV32-NEXT: addi a5, a5, 16
2726 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2727 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2728 ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2729 ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
2730 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2731 ; RV32-NEXT: csrr a5, vlenb
2732 ; RV32-NEXT: slli a5, a5, 4
2733 ; RV32-NEXT: add a5, sp, a5
2734 ; RV32-NEXT: addi a5, a5, 16
2735 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2736 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2737 ; RV32-NEXT: csrr a5, vlenb
2738 ; RV32-NEXT: slli a5, a5, 4
2739 ; RV32-NEXT: add a5, sp, a5
2740 ; RV32-NEXT: addi a5, a5, 16
2741 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2742 ; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t
2743 ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t
2744 ; RV32-NEXT: vand.vx v16, v16, a2, v0.t
2745 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2746 ; RV32-NEXT: addi a1, sp, 16
2747 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2748 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
2749 ; RV32-NEXT: vand.vx v24, v24, a4, v0.t
2750 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2751 ; RV32-NEXT: csrr a1, vlenb
2752 ; RV32-NEXT: slli a1, a1, 3
2753 ; RV32-NEXT: add a1, sp, a1
2754 ; RV32-NEXT: addi a1, a1, 16
2755 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2756 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2757 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
2758 ; RV32-NEXT: addi a1, sp, 16
2759 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2760 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
2761 ; RV32-NEXT: csrr a1, vlenb
2762 ; RV32-NEXT: slli a1, a1, 4
2763 ; RV32-NEXT: add a1, sp, a1
2764 ; RV32-NEXT: addi a1, a1, 16
2765 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2766 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2767 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2768 ; RV32-NEXT: lui a1, 61681
2769 ; RV32-NEXT: addi a1, a1, -241
2770 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2771 ; RV32-NEXT: vmv.v.x v24, a1
2772 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2773 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2774 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2775 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
2776 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2777 ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
2778 ; RV32-NEXT: lui a1, 209715
2779 ; RV32-NEXT: addi a1, a1, 819
2780 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2781 ; RV32-NEXT: vmv.v.x v24, a1
2782 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2783 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2784 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2785 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
2786 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2787 ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
2788 ; RV32-NEXT: lui a1, 349525
2789 ; RV32-NEXT: addi a1, a1, 1365
2790 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2791 ; RV32-NEXT: vmv.v.x v24, a1
2792 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2793 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2794 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
2795 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
2796 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2797 ; RV32-NEXT: csrr a0, vlenb
2798 ; RV32-NEXT: li a1, 24
2799 ; RV32-NEXT: mul a0, a0, a1
2800 ; RV32-NEXT: add sp, sp, a0
2801 ; RV32-NEXT: addi sp, sp, 16
2804 ; RV64-LABEL: vp_bitreverse_nxv8i64:
2806 ; RV64-NEXT: addi sp, sp, -16
2807 ; RV64-NEXT: .cfi_def_cfa_offset 16
2808 ; RV64-NEXT: csrr a1, vlenb
2809 ; RV64-NEXT: slli a1, a1, 3
2810 ; RV64-NEXT: sub sp, sp, a1
2811 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2812 ; RV64-NEXT: lui a1, 4080
2813 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2814 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
2815 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
2816 ; RV64-NEXT: li a0, 255
2817 ; RV64-NEXT: slli a0, a0, 24
2818 ; RV64-NEXT: vand.vx v24, v8, a0, v0.t
2819 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
2820 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2821 ; RV64-NEXT: addi a2, sp, 16
2822 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
2823 ; RV64-NEXT: li a2, 56
2824 ; RV64-NEXT: vsll.vx v24, v8, a2, v0.t
2825 ; RV64-NEXT: lui a3, 16
2826 ; RV64-NEXT: addiw a3, a3, -256
2827 ; RV64-NEXT: li a4, 40
2828 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t
2829 ; RV64-NEXT: vsll.vx v16, v16, a4, v0.t
2830 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
2831 ; RV64-NEXT: addi a5, sp, 16
2832 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2833 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2834 ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2835 ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t
2836 ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
2837 ; RV64-NEXT: vand.vx v16, v16, a3, v0.t
2838 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
2839 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
2840 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
2841 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
2842 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2843 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
2844 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
2845 ; RV64-NEXT: addi a0, sp, 16
2846 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2847 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2848 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
2849 ; RV64-NEXT: lui a0, 61681
2850 ; RV64-NEXT: addiw a0, a0, -241
2851 ; RV64-NEXT: slli a1, a0, 32
2852 ; RV64-NEXT: add a0, a0, a1
2853 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2854 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2855 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
2856 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2857 ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
2858 ; RV64-NEXT: lui a0, 209715
2859 ; RV64-NEXT: addiw a0, a0, 819
2860 ; RV64-NEXT: slli a1, a0, 32
2861 ; RV64-NEXT: add a0, a0, a1
2862 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2863 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2864 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
2865 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2866 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
2867 ; RV64-NEXT: lui a0, 349525
2868 ; RV64-NEXT: addiw a0, a0, 1365
2869 ; RV64-NEXT: slli a1, a0, 32
2870 ; RV64-NEXT: add a0, a0, a1
2871 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2872 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2873 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
2874 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2875 ; RV64-NEXT: csrr a0, vlenb
2876 ; RV64-NEXT: slli a0, a0, 3
2877 ; RV64-NEXT: add sp, sp, a0
2878 ; RV64-NEXT: addi sp, sp, 16
2881 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64:
2882 ; CHECK-ZVBB: # %bb.0:
2883 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2884 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
2885 ; CHECK-ZVBB-NEXT: ret
2886 %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
2887 ret <vscale x 8 x i64> %v
2890 define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
2891 ; RV32-LABEL: vp_bitreverse_nxv8i64_unmasked:
2893 ; RV32-NEXT: addi sp, sp, -16
2894 ; RV32-NEXT: .cfi_def_cfa_offset 16
2895 ; RV32-NEXT: csrr a1, vlenb
2896 ; RV32-NEXT: slli a1, a1, 3
2897 ; RV32-NEXT: sub sp, sp, a1
2898 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2899 ; RV32-NEXT: sw zero, 12(sp)
2900 ; RV32-NEXT: lui a1, 1044480
2901 ; RV32-NEXT: sw a1, 8(sp)
2902 ; RV32-NEXT: li a1, 56
2903 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2904 ; RV32-NEXT: vsll.vx v16, v8, a1
2905 ; RV32-NEXT: lui a2, 16
2906 ; RV32-NEXT: addi a2, a2, -256
2907 ; RV32-NEXT: vand.vx v24, v8, a2
2908 ; RV32-NEXT: li a3, 40
2909 ; RV32-NEXT: vsll.vx v24, v24, a3
2910 ; RV32-NEXT: vor.vv v16, v16, v24
2911 ; RV32-NEXT: addi a4, sp, 16
2912 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2913 ; RV32-NEXT: lui a4, 4080
2914 ; RV32-NEXT: vand.vx v16, v8, a4
2915 ; RV32-NEXT: vsll.vi v0, v16, 24
2916 ; RV32-NEXT: addi a5, sp, 8
2917 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
2918 ; RV32-NEXT: vlse64.v v16, (a5), zero
2919 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2920 ; RV32-NEXT: vand.vv v24, v8, v16
2921 ; RV32-NEXT: vsll.vi v24, v24, 8
2922 ; RV32-NEXT: vor.vv v24, v0, v24
2923 ; RV32-NEXT: addi a5, sp, 16
2924 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload
2925 ; RV32-NEXT: vor.vv v24, v0, v24
2926 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2927 ; RV32-NEXT: vsrl.vx v0, v8, a3
2928 ; RV32-NEXT: vand.vx v0, v0, a2
2929 ; RV32-NEXT: vsrl.vx v24, v8, a1
2930 ; RV32-NEXT: vor.vv v24, v0, v24
2931 ; RV32-NEXT: vsrl.vi v0, v8, 8
2932 ; RV32-NEXT: vand.vv v16, v0, v16
2933 ; RV32-NEXT: vsrl.vi v8, v8, 24
2934 ; RV32-NEXT: vand.vx v8, v8, a4
2935 ; RV32-NEXT: vor.vv v8, v16, v8
2936 ; RV32-NEXT: vor.vv v8, v8, v24
2937 ; RV32-NEXT: addi a1, sp, 16
2938 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2939 ; RV32-NEXT: vor.vv v8, v16, v8
2940 ; RV32-NEXT: vsrl.vi v16, v8, 4
2941 ; RV32-NEXT: lui a1, 61681
2942 ; RV32-NEXT: addi a1, a1, -241
2943 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2944 ; RV32-NEXT: vmv.v.x v24, a1
2945 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2946 ; RV32-NEXT: vand.vv v16, v16, v24
2947 ; RV32-NEXT: vand.vv v8, v8, v24
2948 ; RV32-NEXT: vsll.vi v8, v8, 4
2949 ; RV32-NEXT: vor.vv v8, v16, v8
2950 ; RV32-NEXT: vsrl.vi v16, v8, 2
2951 ; RV32-NEXT: lui a1, 209715
2952 ; RV32-NEXT: addi a1, a1, 819
2953 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2954 ; RV32-NEXT: vmv.v.x v24, a1
2955 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2956 ; RV32-NEXT: vand.vv v16, v16, v24
2957 ; RV32-NEXT: vand.vv v8, v8, v24
2958 ; RV32-NEXT: vsll.vi v8, v8, 2
2959 ; RV32-NEXT: vor.vv v8, v16, v8
2960 ; RV32-NEXT: vsrl.vi v16, v8, 1
2961 ; RV32-NEXT: lui a1, 349525
2962 ; RV32-NEXT: addi a1, a1, 1365
2963 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
2964 ; RV32-NEXT: vmv.v.x v24, a1
2965 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2966 ; RV32-NEXT: vand.vv v16, v16, v24
2967 ; RV32-NEXT: vand.vv v8, v8, v24
2968 ; RV32-NEXT: vadd.vv v8, v8, v8
2969 ; RV32-NEXT: vor.vv v8, v16, v8
2970 ; RV32-NEXT: csrr a0, vlenb
2971 ; RV32-NEXT: slli a0, a0, 3
2972 ; RV32-NEXT: add sp, sp, a0
2973 ; RV32-NEXT: addi sp, sp, 16
2976 ; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked:
2978 ; RV64-NEXT: lui a1, 4080
2979 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2980 ; RV64-NEXT: vand.vx v16, v8, a1
2981 ; RV64-NEXT: vsll.vi v16, v16, 24
2982 ; RV64-NEXT: li a0, 255
2983 ; RV64-NEXT: slli a0, a0, 24
2984 ; RV64-NEXT: vand.vx v24, v8, a0
2985 ; RV64-NEXT: vsll.vi v24, v24, 8
2986 ; RV64-NEXT: vor.vv v16, v16, v24
2987 ; RV64-NEXT: li a2, 56
2988 ; RV64-NEXT: vsll.vx v24, v8, a2
2989 ; RV64-NEXT: lui a3, 16
2990 ; RV64-NEXT: addiw a3, a3, -256
2991 ; RV64-NEXT: vand.vx v0, v8, a3
2992 ; RV64-NEXT: li a4, 40
2993 ; RV64-NEXT: vsll.vx v0, v0, a4
2994 ; RV64-NEXT: vor.vv v24, v24, v0
2995 ; RV64-NEXT: vor.vv v16, v24, v16
2996 ; RV64-NEXT: vsrl.vx v24, v8, a2
2997 ; RV64-NEXT: vsrl.vx v0, v8, a4
2998 ; RV64-NEXT: vand.vx v0, v0, a3
2999 ; RV64-NEXT: vor.vv v24, v0, v24
3000 ; RV64-NEXT: vsrl.vi v0, v8, 24
3001 ; RV64-NEXT: vand.vx v0, v0, a1
3002 ; RV64-NEXT: vsrl.vi v8, v8, 8
3003 ; RV64-NEXT: vand.vx v8, v8, a0
3004 ; RV64-NEXT: vor.vv v8, v8, v0
3005 ; RV64-NEXT: vor.vv v8, v8, v24
3006 ; RV64-NEXT: vor.vv v8, v16, v8
3007 ; RV64-NEXT: vsrl.vi v16, v8, 4
3008 ; RV64-NEXT: lui a0, 61681
3009 ; RV64-NEXT: addiw a0, a0, -241
3010 ; RV64-NEXT: slli a1, a0, 32
3011 ; RV64-NEXT: add a0, a0, a1
3012 ; RV64-NEXT: vand.vx v16, v16, a0
3013 ; RV64-NEXT: vand.vx v8, v8, a0
3014 ; RV64-NEXT: vsll.vi v8, v8, 4
3015 ; RV64-NEXT: vor.vv v8, v16, v8
3016 ; RV64-NEXT: vsrl.vi v16, v8, 2
3017 ; RV64-NEXT: lui a0, 209715
3018 ; RV64-NEXT: addiw a0, a0, 819
3019 ; RV64-NEXT: slli a1, a0, 32
3020 ; RV64-NEXT: add a0, a0, a1
3021 ; RV64-NEXT: vand.vx v16, v16, a0
3022 ; RV64-NEXT: vand.vx v8, v8, a0
3023 ; RV64-NEXT: vsll.vi v8, v8, 2
3024 ; RV64-NEXT: vor.vv v8, v16, v8
3025 ; RV64-NEXT: vsrl.vi v16, v8, 1
3026 ; RV64-NEXT: lui a0, 349525
3027 ; RV64-NEXT: addiw a0, a0, 1365
3028 ; RV64-NEXT: slli a1, a0, 32
3029 ; RV64-NEXT: add a0, a0, a1
3030 ; RV64-NEXT: vand.vx v16, v16, a0
3031 ; RV64-NEXT: vand.vx v8, v8, a0
3032 ; RV64-NEXT: vadd.vv v8, v8, v8
3033 ; RV64-NEXT: vor.vv v8, v16, v8
3036 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked:
3037 ; CHECK-ZVBB: # %bb.0:
3038 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
3039 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
3040 ; CHECK-ZVBB-NEXT: ret
3041 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
3042 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
3043 %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
3044 ret <vscale x 8 x i64> %v
3047 ; Test splitting. Use i16 version for easier check.
3048 declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32)
3050 define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
3051 ; CHECK-LABEL: vp_bitreverse_nxv64i16:
3053 ; CHECK-NEXT: addi sp, sp, -16
3054 ; CHECK-NEXT: .cfi_def_cfa_offset 16
3055 ; CHECK-NEXT: csrr a1, vlenb
3056 ; CHECK-NEXT: slli a1, a1, 4
3057 ; CHECK-NEXT: sub sp, sp, a1
3058 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3059 ; CHECK-NEXT: vmv1r.v v24, v0
3060 ; CHECK-NEXT: csrr a1, vlenb
3061 ; CHECK-NEXT: slli a1, a1, 3
3062 ; CHECK-NEXT: add a1, sp, a1
3063 ; CHECK-NEXT: addi a1, a1, 16
3064 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
3065 ; CHECK-NEXT: csrr a2, vlenb
3066 ; CHECK-NEXT: srli a1, a2, 1
3067 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
3068 ; CHECK-NEXT: vslidedown.vx v0, v0, a1
3069 ; CHECK-NEXT: slli a2, a2, 2
3070 ; CHECK-NEXT: sub a1, a0, a2
3071 ; CHECK-NEXT: sltu a3, a0, a1
3072 ; CHECK-NEXT: addi a3, a3, -1
3073 ; CHECK-NEXT: and a1, a3, a1
3074 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
3075 ; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t
3076 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
3077 ; CHECK-NEXT: vor.vv v16, v16, v8, v0.t
3078 ; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t
3079 ; CHECK-NEXT: lui a1, 1
3080 ; CHECK-NEXT: addi a1, a1, -241
3081 ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3082 ; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3083 ; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
3084 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
3085 ; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t
3086 ; CHECK-NEXT: lui a3, 3
3087 ; CHECK-NEXT: addi a3, a3, 819
3088 ; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
3089 ; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
3090 ; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
3091 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
3092 ; CHECK-NEXT: vsrl.vi v8, v16, 1, v0.t
3093 ; CHECK-NEXT: lui a4, 5
3094 ; CHECK-NEXT: addi a4, a4, 1365
3095 ; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
3096 ; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3097 ; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t
3098 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
3099 ; CHECK-NEXT: addi a5, sp, 16
3100 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
3101 ; CHECK-NEXT: bltu a0, a2, .LBB46_2
3102 ; CHECK-NEXT: # %bb.1:
3103 ; CHECK-NEXT: mv a0, a2
3104 ; CHECK-NEXT: .LBB46_2:
3105 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3106 ; CHECK-NEXT: vmv1r.v v0, v24
3107 ; CHECK-NEXT: csrr a0, vlenb
3108 ; CHECK-NEXT: slli a0, a0, 3
3109 ; CHECK-NEXT: add a0, sp, a0
3110 ; CHECK-NEXT: addi a0, a0, 16
3111 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
3112 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
3113 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
3114 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
3115 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
3116 ; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3117 ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3118 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3119 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3120 ; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
3121 ; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
3122 ; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
3123 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3124 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3125 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
3126 ; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3127 ; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
3128 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3129 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3130 ; CHECK-NEXT: addi a0, sp, 16
3131 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
3132 ; CHECK-NEXT: csrr a0, vlenb
3133 ; CHECK-NEXT: slli a0, a0, 4
3134 ; CHECK-NEXT: add sp, sp, a0
3135 ; CHECK-NEXT: addi sp, sp, 16
3138 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
3139 ; CHECK-ZVBB: # %bb.0:
3140 ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0
3141 ; CHECK-ZVBB-NEXT: csrr a1, vlenb
3142 ; CHECK-ZVBB-NEXT: srli a2, a1, 1
3143 ; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
3144 ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2
3145 ; CHECK-ZVBB-NEXT: slli a1, a1, 2
3146 ; CHECK-ZVBB-NEXT: sub a2, a0, a1
3147 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2
3148 ; CHECK-ZVBB-NEXT: addi a3, a3, -1
3149 ; CHECK-ZVBB-NEXT: and a2, a3, a2
3150 ; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma
3151 ; CHECK-ZVBB-NEXT: vbrev.v v16, v16, v0.t
3152 ; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2
3153 ; CHECK-ZVBB-NEXT: # %bb.1:
3154 ; CHECK-ZVBB-NEXT: mv a0, a1
3155 ; CHECK-ZVBB-NEXT: .LBB46_2:
3156 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3157 ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
3158 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
3159 ; CHECK-ZVBB-NEXT: ret
3160 %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
3161 ret <vscale x 64 x i16> %v
3164 define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) {
3165 ; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked:
3167 ; CHECK-NEXT: csrr a1, vlenb
3168 ; CHECK-NEXT: slli a1, a1, 2
3169 ; CHECK-NEXT: sub a2, a0, a1
3170 ; CHECK-NEXT: sltu a3, a0, a2
3171 ; CHECK-NEXT: addi a3, a3, -1
3172 ; CHECK-NEXT: and a2, a3, a2
3173 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
3174 ; CHECK-NEXT: vsrl.vi v24, v16, 8
3175 ; CHECK-NEXT: vsll.vi v16, v16, 8
3176 ; CHECK-NEXT: vor.vv v16, v16, v24
3177 ; CHECK-NEXT: vsrl.vi v24, v16, 4
3178 ; CHECK-NEXT: lui a2, 1
3179 ; CHECK-NEXT: addi a2, a2, -241
3180 ; CHECK-NEXT: vand.vx v24, v24, a2
3181 ; CHECK-NEXT: vand.vx v16, v16, a2
3182 ; CHECK-NEXT: vsll.vi v16, v16, 4
3183 ; CHECK-NEXT: vor.vv v16, v24, v16
3184 ; CHECK-NEXT: vsrl.vi v24, v16, 2
3185 ; CHECK-NEXT: lui a3, 3
3186 ; CHECK-NEXT: addi a3, a3, 819
3187 ; CHECK-NEXT: vand.vx v24, v24, a3
3188 ; CHECK-NEXT: vand.vx v16, v16, a3
3189 ; CHECK-NEXT: vsll.vi v16, v16, 2
3190 ; CHECK-NEXT: vor.vv v16, v24, v16
3191 ; CHECK-NEXT: vsrl.vi v24, v16, 1
3192 ; CHECK-NEXT: lui a4, 5
3193 ; CHECK-NEXT: addi a4, a4, 1365
3194 ; CHECK-NEXT: vand.vx v24, v24, a4
3195 ; CHECK-NEXT: vand.vx v16, v16, a4
3196 ; CHECK-NEXT: vadd.vv v16, v16, v16
3197 ; CHECK-NEXT: vor.vv v16, v24, v16
3198 ; CHECK-NEXT: bltu a0, a1, .LBB47_2
3199 ; CHECK-NEXT: # %bb.1:
3200 ; CHECK-NEXT: mv a0, a1
3201 ; CHECK-NEXT: .LBB47_2:
3202 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3203 ; CHECK-NEXT: vsrl.vi v24, v8, 8
3204 ; CHECK-NEXT: vsll.vi v8, v8, 8
3205 ; CHECK-NEXT: vor.vv v8, v8, v24
3206 ; CHECK-NEXT: vsrl.vi v24, v8, 4
3207 ; CHECK-NEXT: vand.vx v24, v24, a2
3208 ; CHECK-NEXT: vand.vx v8, v8, a2
3209 ; CHECK-NEXT: vsll.vi v8, v8, 4
3210 ; CHECK-NEXT: vor.vv v8, v24, v8
3211 ; CHECK-NEXT: vsrl.vi v24, v8, 2
3212 ; CHECK-NEXT: vand.vx v24, v24, a3
3213 ; CHECK-NEXT: vand.vx v8, v8, a3
3214 ; CHECK-NEXT: vsll.vi v8, v8, 2
3215 ; CHECK-NEXT: vor.vv v8, v24, v8
3216 ; CHECK-NEXT: vsrl.vi v24, v8, 1
3217 ; CHECK-NEXT: vand.vx v24, v24, a4
3218 ; CHECK-NEXT: vand.vx v8, v8, a4
3219 ; CHECK-NEXT: vadd.vv v8, v8, v8
3220 ; CHECK-NEXT: vor.vv v8, v24, v8
3223 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked:
3224 ; CHECK-ZVBB: # %bb.0:
3225 ; CHECK-ZVBB-NEXT: csrr a1, vlenb
3226 ; CHECK-ZVBB-NEXT: slli a1, a1, 2
3227 ; CHECK-ZVBB-NEXT: sub a2, a0, a1
3228 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2
3229 ; CHECK-ZVBB-NEXT: addi a3, a3, -1
3230 ; CHECK-ZVBB-NEXT: and a2, a3, a2
3231 ; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma
3232 ; CHECK-ZVBB-NEXT: vbrev.v v16, v16
3233 ; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2
3234 ; CHECK-ZVBB-NEXT: # %bb.1:
3235 ; CHECK-ZVBB-NEXT: mv a0, a1
3236 ; CHECK-ZVBB-NEXT: .LBB47_2:
3237 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3238 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
3239 ; CHECK-ZVBB-NEXT: ret
3240 %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
3241 %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
3242 %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
3243 ret <vscale x 64 x i16> %v
3247 declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
3248 define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
3249 ; CHECK-LABEL: vp_bitreverse_nxv1i9:
3251 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
3252 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
3253 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
3254 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
3255 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
3256 ; CHECK-NEXT: lui a0, 1
3257 ; CHECK-NEXT: addi a0, a0, -241
3258 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
3259 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
3260 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3261 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
3262 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
3263 ; CHECK-NEXT: lui a0, 3
3264 ; CHECK-NEXT: addi a0, a0, 819
3265 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
3266 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
3267 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3268 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
3269 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
3270 ; CHECK-NEXT: lui a0, 5
3271 ; CHECK-NEXT: addi a0, a0, 1365
3272 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
3273 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
3274 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3275 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
3276 ; CHECK-NEXT: vsrl.vi v8, v8, 7, v0.t
3279 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9:
3280 ; CHECK-ZVBB: # %bb.0:
3281 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
3282 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
3283 ; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 7, v0.t
3284 ; CHECK-ZVBB-NEXT: ret
3285 %v = call <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
3286 ret <vscale x 1 x i9> %v