1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
8 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
11 declare <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
13 define <vscale x 1 x i8> @vp_bitreverse_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; CHECK-LABEL: vp_bitreverse_nxv1i8:
16 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
17 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
18 ; CHECK-NEXT: li a0, 51
19 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
20 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
21 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
22 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
23 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
24 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
25 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
26 ; CHECK-NEXT: li a0, 85
27 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
28 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
29 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
30 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
31 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
32 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
33 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
36 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8:
37 ; CHECK-ZVBB: # %bb.0:
38 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
39 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
40 ; CHECK-ZVBB-NEXT: ret
41 %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
42 ret <vscale x 1 x i8> %v
45 define <vscale x 1 x i8> @vp_bitreverse_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
46 ; CHECK-LABEL: vp_bitreverse_nxv1i8_unmasked:
48 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
49 ; CHECK-NEXT: vand.vi v9, v8, 15
50 ; CHECK-NEXT: vsrl.vi v8, v8, 4
51 ; CHECK-NEXT: li a0, 51
52 ; CHECK-NEXT: vsll.vi v9, v9, 4
53 ; CHECK-NEXT: vand.vi v8, v8, 15
54 ; CHECK-NEXT: vor.vv v8, v8, v9
55 ; CHECK-NEXT: vsrl.vi v9, v8, 2
56 ; CHECK-NEXT: vand.vx v8, v8, a0
57 ; CHECK-NEXT: vand.vx v9, v9, a0
58 ; CHECK-NEXT: li a0, 85
59 ; CHECK-NEXT: vsll.vi v8, v8, 2
60 ; CHECK-NEXT: vor.vv v8, v9, v8
61 ; CHECK-NEXT: vsrl.vi v9, v8, 1
62 ; CHECK-NEXT: vand.vx v8, v8, a0
63 ; CHECK-NEXT: vand.vx v9, v9, a0
64 ; CHECK-NEXT: vadd.vv v8, v8, v8
65 ; CHECK-NEXT: vor.vv v8, v9, v8
68 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8_unmasked:
69 ; CHECK-ZVBB: # %bb.0:
70 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
71 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
72 ; CHECK-ZVBB-NEXT: ret
73 %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
74 ret <vscale x 1 x i8> %v
77 declare <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
79 define <vscale x 2 x i8> @vp_bitreverse_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
80 ; CHECK-LABEL: vp_bitreverse_nxv2i8:
82 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
83 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
84 ; CHECK-NEXT: li a0, 51
85 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
86 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
87 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
88 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
89 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
90 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
91 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
92 ; CHECK-NEXT: li a0, 85
93 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
94 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
95 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
96 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
97 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
98 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
99 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
102 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8:
103 ; CHECK-ZVBB: # %bb.0:
104 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
105 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
106 ; CHECK-ZVBB-NEXT: ret
107 %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
108 ret <vscale x 2 x i8> %v
111 define <vscale x 2 x i8> @vp_bitreverse_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
112 ; CHECK-LABEL: vp_bitreverse_nxv2i8_unmasked:
114 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
115 ; CHECK-NEXT: vand.vi v9, v8, 15
116 ; CHECK-NEXT: vsrl.vi v8, v8, 4
117 ; CHECK-NEXT: li a0, 51
118 ; CHECK-NEXT: vsll.vi v9, v9, 4
119 ; CHECK-NEXT: vand.vi v8, v8, 15
120 ; CHECK-NEXT: vor.vv v8, v8, v9
121 ; CHECK-NEXT: vsrl.vi v9, v8, 2
122 ; CHECK-NEXT: vand.vx v8, v8, a0
123 ; CHECK-NEXT: vand.vx v9, v9, a0
124 ; CHECK-NEXT: li a0, 85
125 ; CHECK-NEXT: vsll.vi v8, v8, 2
126 ; CHECK-NEXT: vor.vv v8, v9, v8
127 ; CHECK-NEXT: vsrl.vi v9, v8, 1
128 ; CHECK-NEXT: vand.vx v8, v8, a0
129 ; CHECK-NEXT: vand.vx v9, v9, a0
130 ; CHECK-NEXT: vadd.vv v8, v8, v8
131 ; CHECK-NEXT: vor.vv v8, v9, v8
134 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8_unmasked:
135 ; CHECK-ZVBB: # %bb.0:
136 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
137 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
138 ; CHECK-ZVBB-NEXT: ret
139 %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
140 ret <vscale x 2 x i8> %v
143 declare <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
145 define <vscale x 4 x i8> @vp_bitreverse_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
146 ; CHECK-LABEL: vp_bitreverse_nxv4i8:
148 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
149 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
150 ; CHECK-NEXT: li a0, 51
151 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
152 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
153 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
154 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
155 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
156 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
157 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
158 ; CHECK-NEXT: li a0, 85
159 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
160 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
161 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
162 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
163 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
164 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
165 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
168 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8:
169 ; CHECK-ZVBB: # %bb.0:
170 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
171 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
172 ; CHECK-ZVBB-NEXT: ret
173 %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
174 ret <vscale x 4 x i8> %v
177 define <vscale x 4 x i8> @vp_bitreverse_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
178 ; CHECK-LABEL: vp_bitreverse_nxv4i8_unmasked:
180 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
181 ; CHECK-NEXT: vand.vi v9, v8, 15
182 ; CHECK-NEXT: vsrl.vi v8, v8, 4
183 ; CHECK-NEXT: li a0, 51
184 ; CHECK-NEXT: vsll.vi v9, v9, 4
185 ; CHECK-NEXT: vand.vi v8, v8, 15
186 ; CHECK-NEXT: vor.vv v8, v8, v9
187 ; CHECK-NEXT: vsrl.vi v9, v8, 2
188 ; CHECK-NEXT: vand.vx v8, v8, a0
189 ; CHECK-NEXT: vand.vx v9, v9, a0
190 ; CHECK-NEXT: li a0, 85
191 ; CHECK-NEXT: vsll.vi v8, v8, 2
192 ; CHECK-NEXT: vor.vv v8, v9, v8
193 ; CHECK-NEXT: vsrl.vi v9, v8, 1
194 ; CHECK-NEXT: vand.vx v8, v8, a0
195 ; CHECK-NEXT: vand.vx v9, v9, a0
196 ; CHECK-NEXT: vadd.vv v8, v8, v8
197 ; CHECK-NEXT: vor.vv v8, v9, v8
200 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8_unmasked:
201 ; CHECK-ZVBB: # %bb.0:
202 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
203 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
204 ; CHECK-ZVBB-NEXT: ret
205 %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
206 ret <vscale x 4 x i8> %v
209 declare <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
211 define <vscale x 8 x i8> @vp_bitreverse_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
212 ; CHECK-LABEL: vp_bitreverse_nxv8i8:
214 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
215 ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t
216 ; CHECK-NEXT: li a0, 51
217 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t
218 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
219 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
220 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
221 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
222 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
223 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
224 ; CHECK-NEXT: li a0, 85
225 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
226 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
227 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
228 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
229 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
230 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
231 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
234 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8:
235 ; CHECK-ZVBB: # %bb.0:
236 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
237 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
238 ; CHECK-ZVBB-NEXT: ret
239 %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
240 ret <vscale x 8 x i8> %v
243 define <vscale x 8 x i8> @vp_bitreverse_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
244 ; CHECK-LABEL: vp_bitreverse_nxv8i8_unmasked:
246 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
247 ; CHECK-NEXT: vand.vi v9, v8, 15
248 ; CHECK-NEXT: vsrl.vi v8, v8, 4
249 ; CHECK-NEXT: li a0, 51
250 ; CHECK-NEXT: vsll.vi v9, v9, 4
251 ; CHECK-NEXT: vand.vi v8, v8, 15
252 ; CHECK-NEXT: vor.vv v8, v8, v9
253 ; CHECK-NEXT: vsrl.vi v9, v8, 2
254 ; CHECK-NEXT: vand.vx v8, v8, a0
255 ; CHECK-NEXT: vand.vx v9, v9, a0
256 ; CHECK-NEXT: li a0, 85
257 ; CHECK-NEXT: vsll.vi v8, v8, 2
258 ; CHECK-NEXT: vor.vv v8, v9, v8
259 ; CHECK-NEXT: vsrl.vi v9, v8, 1
260 ; CHECK-NEXT: vand.vx v8, v8, a0
261 ; CHECK-NEXT: vand.vx v9, v9, a0
262 ; CHECK-NEXT: vadd.vv v8, v8, v8
263 ; CHECK-NEXT: vor.vv v8, v9, v8
266 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8_unmasked:
267 ; CHECK-ZVBB: # %bb.0:
268 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
269 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
270 ; CHECK-ZVBB-NEXT: ret
271 %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
272 ret <vscale x 8 x i8> %v
275 declare <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
277 define <vscale x 16 x i8> @vp_bitreverse_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
278 ; CHECK-LABEL: vp_bitreverse_nxv16i8:
280 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
281 ; CHECK-NEXT: vand.vi v10, v8, 15, v0.t
282 ; CHECK-NEXT: li a0, 51
283 ; CHECK-NEXT: vsll.vi v10, v10, 4, v0.t
284 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
285 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
286 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
287 ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
288 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
289 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
290 ; CHECK-NEXT: li a0, 85
291 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
292 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
293 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
294 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
295 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
296 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
297 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
300 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8:
301 ; CHECK-ZVBB: # %bb.0:
302 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
303 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
304 ; CHECK-ZVBB-NEXT: ret
305 %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
306 ret <vscale x 16 x i8> %v
309 define <vscale x 16 x i8> @vp_bitreverse_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
310 ; CHECK-LABEL: vp_bitreverse_nxv16i8_unmasked:
312 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
313 ; CHECK-NEXT: vand.vi v10, v8, 15
314 ; CHECK-NEXT: vsrl.vi v8, v8, 4
315 ; CHECK-NEXT: li a0, 51
316 ; CHECK-NEXT: vsll.vi v10, v10, 4
317 ; CHECK-NEXT: vand.vi v8, v8, 15
318 ; CHECK-NEXT: vor.vv v8, v8, v10
319 ; CHECK-NEXT: vsrl.vi v10, v8, 2
320 ; CHECK-NEXT: vand.vx v8, v8, a0
321 ; CHECK-NEXT: vand.vx v10, v10, a0
322 ; CHECK-NEXT: li a0, 85
323 ; CHECK-NEXT: vsll.vi v8, v8, 2
324 ; CHECK-NEXT: vor.vv v8, v10, v8
325 ; CHECK-NEXT: vsrl.vi v10, v8, 1
326 ; CHECK-NEXT: vand.vx v8, v8, a0
327 ; CHECK-NEXT: vand.vx v10, v10, a0
328 ; CHECK-NEXT: vadd.vv v8, v8, v8
329 ; CHECK-NEXT: vor.vv v8, v10, v8
332 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8_unmasked:
333 ; CHECK-ZVBB: # %bb.0:
334 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
335 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
336 ; CHECK-ZVBB-NEXT: ret
337 %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
338 ret <vscale x 16 x i8> %v
341 declare <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32)
343 define <vscale x 32 x i8> @vp_bitreverse_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
344 ; CHECK-LABEL: vp_bitreverse_nxv32i8:
346 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
347 ; CHECK-NEXT: vand.vi v12, v8, 15, v0.t
348 ; CHECK-NEXT: li a0, 51
349 ; CHECK-NEXT: vsll.vi v12, v12, 4, v0.t
350 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
351 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
352 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
353 ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
354 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
355 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
356 ; CHECK-NEXT: li a0, 85
357 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
358 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
359 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
360 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
361 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
362 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
363 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
366 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8:
367 ; CHECK-ZVBB: # %bb.0:
368 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
369 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
370 ; CHECK-ZVBB-NEXT: ret
371 %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
372 ret <vscale x 32 x i8> %v
375 define <vscale x 32 x i8> @vp_bitreverse_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
376 ; CHECK-LABEL: vp_bitreverse_nxv32i8_unmasked:
378 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
379 ; CHECK-NEXT: vand.vi v12, v8, 15
380 ; CHECK-NEXT: vsrl.vi v8, v8, 4
381 ; CHECK-NEXT: li a0, 51
382 ; CHECK-NEXT: vsll.vi v12, v12, 4
383 ; CHECK-NEXT: vand.vi v8, v8, 15
384 ; CHECK-NEXT: vor.vv v8, v8, v12
385 ; CHECK-NEXT: vsrl.vi v12, v8, 2
386 ; CHECK-NEXT: vand.vx v8, v8, a0
387 ; CHECK-NEXT: vand.vx v12, v12, a0
388 ; CHECK-NEXT: li a0, 85
389 ; CHECK-NEXT: vsll.vi v8, v8, 2
390 ; CHECK-NEXT: vor.vv v8, v12, v8
391 ; CHECK-NEXT: vsrl.vi v12, v8, 1
392 ; CHECK-NEXT: vand.vx v8, v8, a0
393 ; CHECK-NEXT: vand.vx v12, v12, a0
394 ; CHECK-NEXT: vadd.vv v8, v8, v8
395 ; CHECK-NEXT: vor.vv v8, v12, v8
398 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8_unmasked:
399 ; CHECK-ZVBB: # %bb.0:
400 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
401 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
402 ; CHECK-ZVBB-NEXT: ret
403 %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
404 ret <vscale x 32 x i8> %v
407 declare <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32)
409 define <vscale x 64 x i8> @vp_bitreverse_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
410 ; CHECK-LABEL: vp_bitreverse_nxv64i8:
412 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
413 ; CHECK-NEXT: vand.vi v16, v8, 15, v0.t
414 ; CHECK-NEXT: li a0, 51
415 ; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
416 ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t
417 ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
418 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
419 ; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t
420 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
421 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
422 ; CHECK-NEXT: li a0, 85
423 ; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
424 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
425 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
426 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
427 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
428 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
429 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
432 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8:
433 ; CHECK-ZVBB: # %bb.0:
434 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma
435 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
436 ; CHECK-ZVBB-NEXT: ret
437 %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
438 ret <vscale x 64 x i8> %v
441 define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
442 ; CHECK-LABEL: vp_bitreverse_nxv64i8_unmasked:
444 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
445 ; CHECK-NEXT: vand.vi v16, v8, 15
446 ; CHECK-NEXT: vsrl.vi v8, v8, 4
447 ; CHECK-NEXT: li a0, 51
448 ; CHECK-NEXT: vsll.vi v16, v16, 4
449 ; CHECK-NEXT: vand.vi v8, v8, 15
450 ; CHECK-NEXT: vor.vv v8, v8, v16
451 ; CHECK-NEXT: vsrl.vi v16, v8, 2
452 ; CHECK-NEXT: vand.vx v8, v8, a0
453 ; CHECK-NEXT: vand.vx v16, v16, a0
454 ; CHECK-NEXT: li a0, 85
455 ; CHECK-NEXT: vsll.vi v8, v8, 2
456 ; CHECK-NEXT: vor.vv v8, v16, v8
457 ; CHECK-NEXT: vsrl.vi v16, v8, 1
458 ; CHECK-NEXT: vand.vx v8, v8, a0
459 ; CHECK-NEXT: vand.vx v16, v16, a0
460 ; CHECK-NEXT: vadd.vv v8, v8, v8
461 ; CHECK-NEXT: vor.vv v8, v16, v8
464 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8_unmasked:
465 ; CHECK-ZVBB: # %bb.0:
466 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma
467 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
468 ; CHECK-ZVBB-NEXT: ret
469 %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
470 ret <vscale x 64 x i8> %v
473 declare <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
475 define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
476 ; CHECK-LABEL: vp_bitreverse_nxv1i16:
478 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
479 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
480 ; CHECK-NEXT: lui a0, 1
481 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
482 ; CHECK-NEXT: addi a0, a0, -241
483 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
484 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
485 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
486 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
487 ; CHECK-NEXT: lui a0, 3
488 ; CHECK-NEXT: addi a0, a0, 819
489 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
490 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
491 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
492 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
493 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
494 ; CHECK-NEXT: lui a0, 5
495 ; CHECK-NEXT: addi a0, a0, 1365
496 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
497 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
498 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
499 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
500 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
501 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
502 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
505 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16:
506 ; CHECK-ZVBB: # %bb.0:
507 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
508 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
509 ; CHECK-ZVBB-NEXT: ret
510 %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
511 ret <vscale x 1 x i16> %v
514 define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
515 ; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked:
517 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
518 ; CHECK-NEXT: vsrl.vi v9, v8, 8
519 ; CHECK-NEXT: vsll.vi v8, v8, 8
520 ; CHECK-NEXT: lui a0, 1
521 ; CHECK-NEXT: vor.vv v8, v8, v9
522 ; CHECK-NEXT: addi a0, a0, -241
523 ; CHECK-NEXT: vsrl.vi v9, v8, 4
524 ; CHECK-NEXT: vand.vx v8, v8, a0
525 ; CHECK-NEXT: vand.vx v9, v9, a0
526 ; CHECK-NEXT: lui a0, 3
527 ; CHECK-NEXT: addi a0, a0, 819
528 ; CHECK-NEXT: vsll.vi v8, v8, 4
529 ; CHECK-NEXT: vor.vv v8, v9, v8
530 ; CHECK-NEXT: vsrl.vi v9, v8, 2
531 ; CHECK-NEXT: vand.vx v8, v8, a0
532 ; CHECK-NEXT: vand.vx v9, v9, a0
533 ; CHECK-NEXT: lui a0, 5
534 ; CHECK-NEXT: addi a0, a0, 1365
535 ; CHECK-NEXT: vsll.vi v8, v8, 2
536 ; CHECK-NEXT: vor.vv v8, v9, v8
537 ; CHECK-NEXT: vsrl.vi v9, v8, 1
538 ; CHECK-NEXT: vand.vx v8, v8, a0
539 ; CHECK-NEXT: vand.vx v9, v9, a0
540 ; CHECK-NEXT: vadd.vv v8, v8, v8
541 ; CHECK-NEXT: vor.vv v8, v9, v8
544 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked:
545 ; CHECK-ZVBB: # %bb.0:
546 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
547 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
548 ; CHECK-ZVBB-NEXT: ret
549 %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
550 ret <vscale x 1 x i16> %v
553 declare <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
555 define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
556 ; CHECK-LABEL: vp_bitreverse_nxv2i16:
558 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
559 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
560 ; CHECK-NEXT: lui a0, 1
561 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
562 ; CHECK-NEXT: addi a0, a0, -241
563 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
564 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
565 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
566 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
567 ; CHECK-NEXT: lui a0, 3
568 ; CHECK-NEXT: addi a0, a0, 819
569 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
570 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
571 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
572 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
573 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
574 ; CHECK-NEXT: lui a0, 5
575 ; CHECK-NEXT: addi a0, a0, 1365
576 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
577 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
578 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
579 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
580 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
581 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
582 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
585 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16:
586 ; CHECK-ZVBB: # %bb.0:
587 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
588 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
589 ; CHECK-ZVBB-NEXT: ret
590 %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
591 ret <vscale x 2 x i16> %v
594 define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
595 ; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked:
597 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
598 ; CHECK-NEXT: vsrl.vi v9, v8, 8
599 ; CHECK-NEXT: vsll.vi v8, v8, 8
600 ; CHECK-NEXT: lui a0, 1
601 ; CHECK-NEXT: vor.vv v8, v8, v9
602 ; CHECK-NEXT: addi a0, a0, -241
603 ; CHECK-NEXT: vsrl.vi v9, v8, 4
604 ; CHECK-NEXT: vand.vx v8, v8, a0
605 ; CHECK-NEXT: vand.vx v9, v9, a0
606 ; CHECK-NEXT: lui a0, 3
607 ; CHECK-NEXT: addi a0, a0, 819
608 ; CHECK-NEXT: vsll.vi v8, v8, 4
609 ; CHECK-NEXT: vor.vv v8, v9, v8
610 ; CHECK-NEXT: vsrl.vi v9, v8, 2
611 ; CHECK-NEXT: vand.vx v8, v8, a0
612 ; CHECK-NEXT: vand.vx v9, v9, a0
613 ; CHECK-NEXT: lui a0, 5
614 ; CHECK-NEXT: addi a0, a0, 1365
615 ; CHECK-NEXT: vsll.vi v8, v8, 2
616 ; CHECK-NEXT: vor.vv v8, v9, v8
617 ; CHECK-NEXT: vsrl.vi v9, v8, 1
618 ; CHECK-NEXT: vand.vx v8, v8, a0
619 ; CHECK-NEXT: vand.vx v9, v9, a0
620 ; CHECK-NEXT: vadd.vv v8, v8, v8
621 ; CHECK-NEXT: vor.vv v8, v9, v8
624 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked:
625 ; CHECK-ZVBB: # %bb.0:
626 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
627 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
628 ; CHECK-ZVBB-NEXT: ret
629 %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
630 ret <vscale x 2 x i16> %v
633 declare <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
635 define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
636 ; CHECK-LABEL: vp_bitreverse_nxv4i16:
638 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
639 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
640 ; CHECK-NEXT: lui a0, 1
641 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
642 ; CHECK-NEXT: addi a0, a0, -241
643 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
644 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
645 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
646 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
647 ; CHECK-NEXT: lui a0, 3
648 ; CHECK-NEXT: addi a0, a0, 819
649 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
650 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
651 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
652 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
653 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
654 ; CHECK-NEXT: lui a0, 5
655 ; CHECK-NEXT: addi a0, a0, 1365
656 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
657 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
658 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
659 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
660 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
661 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
662 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
665 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16:
666 ; CHECK-ZVBB: # %bb.0:
667 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
668 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
669 ; CHECK-ZVBB-NEXT: ret
670 %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
671 ret <vscale x 4 x i16> %v
674 define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
675 ; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked:
677 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
678 ; CHECK-NEXT: vsrl.vi v9, v8, 8
679 ; CHECK-NEXT: vsll.vi v8, v8, 8
680 ; CHECK-NEXT: lui a0, 1
681 ; CHECK-NEXT: vor.vv v8, v8, v9
682 ; CHECK-NEXT: addi a0, a0, -241
683 ; CHECK-NEXT: vsrl.vi v9, v8, 4
684 ; CHECK-NEXT: vand.vx v8, v8, a0
685 ; CHECK-NEXT: vand.vx v9, v9, a0
686 ; CHECK-NEXT: lui a0, 3
687 ; CHECK-NEXT: addi a0, a0, 819
688 ; CHECK-NEXT: vsll.vi v8, v8, 4
689 ; CHECK-NEXT: vor.vv v8, v9, v8
690 ; CHECK-NEXT: vsrl.vi v9, v8, 2
691 ; CHECK-NEXT: vand.vx v8, v8, a0
692 ; CHECK-NEXT: vand.vx v9, v9, a0
693 ; CHECK-NEXT: lui a0, 5
694 ; CHECK-NEXT: addi a0, a0, 1365
695 ; CHECK-NEXT: vsll.vi v8, v8, 2
696 ; CHECK-NEXT: vor.vv v8, v9, v8
697 ; CHECK-NEXT: vsrl.vi v9, v8, 1
698 ; CHECK-NEXT: vand.vx v8, v8, a0
699 ; CHECK-NEXT: vand.vx v9, v9, a0
700 ; CHECK-NEXT: vadd.vv v8, v8, v8
701 ; CHECK-NEXT: vor.vv v8, v9, v8
704 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked:
705 ; CHECK-ZVBB: # %bb.0:
706 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
707 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
708 ; CHECK-ZVBB-NEXT: ret
709 %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
710 ret <vscale x 4 x i16> %v
713 declare <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
715 define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
716 ; CHECK-LABEL: vp_bitreverse_nxv8i16:
718 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
719 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
720 ; CHECK-NEXT: lui a0, 1
721 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
722 ; CHECK-NEXT: addi a0, a0, -241
723 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
724 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
725 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
726 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
727 ; CHECK-NEXT: lui a0, 3
728 ; CHECK-NEXT: addi a0, a0, 819
729 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
730 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
731 ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
732 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
733 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
734 ; CHECK-NEXT: lui a0, 5
735 ; CHECK-NEXT: addi a0, a0, 1365
736 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
737 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
738 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
739 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
740 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
741 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
742 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
745 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16:
746 ; CHECK-ZVBB: # %bb.0:
747 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
748 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
749 ; CHECK-ZVBB-NEXT: ret
750 %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
751 ret <vscale x 8 x i16> %v
754 define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
755 ; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked:
757 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
758 ; CHECK-NEXT: vsrl.vi v10, v8, 8
759 ; CHECK-NEXT: vsll.vi v8, v8, 8
760 ; CHECK-NEXT: lui a0, 1
761 ; CHECK-NEXT: vor.vv v8, v8, v10
762 ; CHECK-NEXT: addi a0, a0, -241
763 ; CHECK-NEXT: vsrl.vi v10, v8, 4
764 ; CHECK-NEXT: vand.vx v8, v8, a0
765 ; CHECK-NEXT: vand.vx v10, v10, a0
766 ; CHECK-NEXT: lui a0, 3
767 ; CHECK-NEXT: addi a0, a0, 819
768 ; CHECK-NEXT: vsll.vi v8, v8, 4
769 ; CHECK-NEXT: vor.vv v8, v10, v8
770 ; CHECK-NEXT: vsrl.vi v10, v8, 2
771 ; CHECK-NEXT: vand.vx v8, v8, a0
772 ; CHECK-NEXT: vand.vx v10, v10, a0
773 ; CHECK-NEXT: lui a0, 5
774 ; CHECK-NEXT: addi a0, a0, 1365
775 ; CHECK-NEXT: vsll.vi v8, v8, 2
776 ; CHECK-NEXT: vor.vv v8, v10, v8
777 ; CHECK-NEXT: vsrl.vi v10, v8, 1
778 ; CHECK-NEXT: vand.vx v8, v8, a0
779 ; CHECK-NEXT: vand.vx v10, v10, a0
780 ; CHECK-NEXT: vadd.vv v8, v8, v8
781 ; CHECK-NEXT: vor.vv v8, v10, v8
784 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked:
785 ; CHECK-ZVBB: # %bb.0:
786 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
787 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
788 ; CHECK-ZVBB-NEXT: ret
789 %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
790 ret <vscale x 8 x i16> %v
793 declare <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
795 define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
796 ; CHECK-LABEL: vp_bitreverse_nxv16i16:
798 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
799 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
800 ; CHECK-NEXT: lui a0, 1
801 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
802 ; CHECK-NEXT: addi a0, a0, -241
803 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
804 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
805 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
806 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
807 ; CHECK-NEXT: lui a0, 3
808 ; CHECK-NEXT: addi a0, a0, 819
809 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
810 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
811 ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
812 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
813 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
814 ; CHECK-NEXT: lui a0, 5
815 ; CHECK-NEXT: addi a0, a0, 1365
816 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
817 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
818 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
819 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
820 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
821 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
822 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
825 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16:
826 ; CHECK-ZVBB: # %bb.0:
827 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
828 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
829 ; CHECK-ZVBB-NEXT: ret
830 %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
831 ret <vscale x 16 x i16> %v
834 define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
835 ; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked:
837 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
838 ; CHECK-NEXT: vsrl.vi v12, v8, 8
839 ; CHECK-NEXT: vsll.vi v8, v8, 8
840 ; CHECK-NEXT: lui a0, 1
841 ; CHECK-NEXT: vor.vv v8, v8, v12
842 ; CHECK-NEXT: addi a0, a0, -241
843 ; CHECK-NEXT: vsrl.vi v12, v8, 4
844 ; CHECK-NEXT: vand.vx v8, v8, a0
845 ; CHECK-NEXT: vand.vx v12, v12, a0
846 ; CHECK-NEXT: lui a0, 3
847 ; CHECK-NEXT: addi a0, a0, 819
848 ; CHECK-NEXT: vsll.vi v8, v8, 4
849 ; CHECK-NEXT: vor.vv v8, v12, v8
850 ; CHECK-NEXT: vsrl.vi v12, v8, 2
851 ; CHECK-NEXT: vand.vx v8, v8, a0
852 ; CHECK-NEXT: vand.vx v12, v12, a0
853 ; CHECK-NEXT: lui a0, 5
854 ; CHECK-NEXT: addi a0, a0, 1365
855 ; CHECK-NEXT: vsll.vi v8, v8, 2
856 ; CHECK-NEXT: vor.vv v8, v12, v8
857 ; CHECK-NEXT: vsrl.vi v12, v8, 1
858 ; CHECK-NEXT: vand.vx v8, v8, a0
859 ; CHECK-NEXT: vand.vx v12, v12, a0
860 ; CHECK-NEXT: vadd.vv v8, v8, v8
861 ; CHECK-NEXT: vor.vv v8, v12, v8
864 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked:
865 ; CHECK-ZVBB: # %bb.0:
866 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
867 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
868 ; CHECK-ZVBB-NEXT: ret
869 %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
870 ret <vscale x 16 x i16> %v
873 declare <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
875 define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
876 ; CHECK-LABEL: vp_bitreverse_nxv32i16:
878 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
879 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
880 ; CHECK-NEXT: lui a0, 1
881 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
882 ; CHECK-NEXT: addi a0, a0, -241
883 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
884 ; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t
885 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
886 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
887 ; CHECK-NEXT: lui a0, 3
888 ; CHECK-NEXT: addi a0, a0, 819
889 ; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
890 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
891 ; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t
892 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
893 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
894 ; CHECK-NEXT: lui a0, 5
895 ; CHECK-NEXT: addi a0, a0, 1365
896 ; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
897 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
898 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
899 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
900 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
901 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
902 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
905 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16:
906 ; CHECK-ZVBB: # %bb.0:
907 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
908 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
909 ; CHECK-ZVBB-NEXT: ret
910 %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
911 ret <vscale x 32 x i16> %v
914 define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
915 ; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked:
917 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
918 ; CHECK-NEXT: vsrl.vi v16, v8, 8
919 ; CHECK-NEXT: vsll.vi v8, v8, 8
920 ; CHECK-NEXT: lui a0, 1
921 ; CHECK-NEXT: vor.vv v8, v8, v16
922 ; CHECK-NEXT: addi a0, a0, -241
923 ; CHECK-NEXT: vsrl.vi v16, v8, 4
924 ; CHECK-NEXT: vand.vx v8, v8, a0
925 ; CHECK-NEXT: vand.vx v16, v16, a0
926 ; CHECK-NEXT: lui a0, 3
927 ; CHECK-NEXT: addi a0, a0, 819
928 ; CHECK-NEXT: vsll.vi v8, v8, 4
929 ; CHECK-NEXT: vor.vv v8, v16, v8
930 ; CHECK-NEXT: vsrl.vi v16, v8, 2
931 ; CHECK-NEXT: vand.vx v8, v8, a0
932 ; CHECK-NEXT: vand.vx v16, v16, a0
933 ; CHECK-NEXT: lui a0, 5
934 ; CHECK-NEXT: addi a0, a0, 1365
935 ; CHECK-NEXT: vsll.vi v8, v8, 2
936 ; CHECK-NEXT: vor.vv v8, v16, v8
937 ; CHECK-NEXT: vsrl.vi v16, v8, 1
938 ; CHECK-NEXT: vand.vx v8, v8, a0
939 ; CHECK-NEXT: vand.vx v16, v16, a0
940 ; CHECK-NEXT: vadd.vv v8, v8, v8
941 ; CHECK-NEXT: vor.vv v8, v16, v8
944 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked:
945 ; CHECK-ZVBB: # %bb.0:
946 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
947 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
948 ; CHECK-ZVBB-NEXT: ret
949 %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
950 ret <vscale x 32 x i16> %v
953 declare <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
955 define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
956 ; CHECK-LABEL: vp_bitreverse_nxv1i32:
958 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
959 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
960 ; CHECK-NEXT: lui a0, 16
961 ; CHECK-NEXT: addi a0, a0, -256
962 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
963 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
964 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
965 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
966 ; CHECK-NEXT: lui a0, 61681
967 ; CHECK-NEXT: addi a0, a0, -241
968 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
969 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
970 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
971 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
972 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
973 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
974 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
975 ; CHECK-NEXT: lui a0, 209715
976 ; CHECK-NEXT: addi a0, a0, 819
977 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
978 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
979 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
980 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
981 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
982 ; CHECK-NEXT: lui a0, 349525
983 ; CHECK-NEXT: addi a0, a0, 1365
984 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
985 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
986 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
987 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
988 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
989 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
990 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
993 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32:
994 ; CHECK-ZVBB: # %bb.0:
995 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
996 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
997 ; CHECK-ZVBB-NEXT: ret
998 %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
999 ret <vscale x 1 x i32> %v
1002 define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
1003 ; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked:
1005 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1006 ; CHECK-NEXT: vsrl.vi v9, v8, 8
1007 ; CHECK-NEXT: lui a0, 16
1008 ; CHECK-NEXT: vsrl.vi v10, v8, 24
1009 ; CHECK-NEXT: addi a0, a0, -256
1010 ; CHECK-NEXT: vand.vx v9, v9, a0
1011 ; CHECK-NEXT: vor.vv v9, v9, v10
1012 ; CHECK-NEXT: vsll.vi v10, v8, 24
1013 ; CHECK-NEXT: vand.vx v8, v8, a0
1014 ; CHECK-NEXT: lui a0, 61681
1015 ; CHECK-NEXT: addi a0, a0, -241
1016 ; CHECK-NEXT: vsll.vi v8, v8, 8
1017 ; CHECK-NEXT: vor.vv v8, v10, v8
1018 ; CHECK-NEXT: vor.vv v8, v8, v9
1019 ; CHECK-NEXT: vsrl.vi v9, v8, 4
1020 ; CHECK-NEXT: vand.vx v8, v8, a0
1021 ; CHECK-NEXT: vand.vx v9, v9, a0
1022 ; CHECK-NEXT: lui a0, 209715
1023 ; CHECK-NEXT: addi a0, a0, 819
1024 ; CHECK-NEXT: vsll.vi v8, v8, 4
1025 ; CHECK-NEXT: vor.vv v8, v9, v8
1026 ; CHECK-NEXT: vsrl.vi v9, v8, 2
1027 ; CHECK-NEXT: vand.vx v8, v8, a0
1028 ; CHECK-NEXT: vand.vx v9, v9, a0
1029 ; CHECK-NEXT: lui a0, 349525
1030 ; CHECK-NEXT: addi a0, a0, 1365
1031 ; CHECK-NEXT: vsll.vi v8, v8, 2
1032 ; CHECK-NEXT: vor.vv v8, v9, v8
1033 ; CHECK-NEXT: vsrl.vi v9, v8, 1
1034 ; CHECK-NEXT: vand.vx v8, v8, a0
1035 ; CHECK-NEXT: vand.vx v9, v9, a0
1036 ; CHECK-NEXT: vadd.vv v8, v8, v8
1037 ; CHECK-NEXT: vor.vv v8, v9, v8
1040 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked:
1041 ; CHECK-ZVBB: # %bb.0:
1042 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1043 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1044 ; CHECK-ZVBB-NEXT: ret
1045 %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1046 ret <vscale x 1 x i32> %v
1049 declare <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1051 define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1052 ; CHECK-LABEL: vp_bitreverse_nxv2i32:
1054 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1055 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
1056 ; CHECK-NEXT: lui a0, 16
1057 ; CHECK-NEXT: addi a0, a0, -256
1058 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1059 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
1060 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
1061 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
1062 ; CHECK-NEXT: lui a0, 61681
1063 ; CHECK-NEXT: addi a0, a0, -241
1064 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
1065 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1066 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
1067 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
1068 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
1069 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1070 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1071 ; CHECK-NEXT: lui a0, 209715
1072 ; CHECK-NEXT: addi a0, a0, 819
1073 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1074 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1075 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
1076 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1077 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1078 ; CHECK-NEXT: lui a0, 349525
1079 ; CHECK-NEXT: addi a0, a0, 1365
1080 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1081 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1082 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
1083 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
1084 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1085 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1086 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
1089 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32:
1090 ; CHECK-ZVBB: # %bb.0:
1091 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1092 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1093 ; CHECK-ZVBB-NEXT: ret
1094 %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
1095 ret <vscale x 2 x i32> %v
1098 define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
1099 ; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked:
1101 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1102 ; CHECK-NEXT: vsrl.vi v9, v8, 8
1103 ; CHECK-NEXT: lui a0, 16
1104 ; CHECK-NEXT: vsrl.vi v10, v8, 24
1105 ; CHECK-NEXT: addi a0, a0, -256
1106 ; CHECK-NEXT: vand.vx v9, v9, a0
1107 ; CHECK-NEXT: vor.vv v9, v9, v10
1108 ; CHECK-NEXT: vsll.vi v10, v8, 24
1109 ; CHECK-NEXT: vand.vx v8, v8, a0
1110 ; CHECK-NEXT: lui a0, 61681
1111 ; CHECK-NEXT: addi a0, a0, -241
1112 ; CHECK-NEXT: vsll.vi v8, v8, 8
1113 ; CHECK-NEXT: vor.vv v8, v10, v8
1114 ; CHECK-NEXT: vor.vv v8, v8, v9
1115 ; CHECK-NEXT: vsrl.vi v9, v8, 4
1116 ; CHECK-NEXT: vand.vx v8, v8, a0
1117 ; CHECK-NEXT: vand.vx v9, v9, a0
1118 ; CHECK-NEXT: lui a0, 209715
1119 ; CHECK-NEXT: addi a0, a0, 819
1120 ; CHECK-NEXT: vsll.vi v8, v8, 4
1121 ; CHECK-NEXT: vor.vv v8, v9, v8
1122 ; CHECK-NEXT: vsrl.vi v9, v8, 2
1123 ; CHECK-NEXT: vand.vx v8, v8, a0
1124 ; CHECK-NEXT: vand.vx v9, v9, a0
1125 ; CHECK-NEXT: lui a0, 349525
1126 ; CHECK-NEXT: addi a0, a0, 1365
1127 ; CHECK-NEXT: vsll.vi v8, v8, 2
1128 ; CHECK-NEXT: vor.vv v8, v9, v8
1129 ; CHECK-NEXT: vsrl.vi v9, v8, 1
1130 ; CHECK-NEXT: vand.vx v8, v8, a0
1131 ; CHECK-NEXT: vand.vx v9, v9, a0
1132 ; CHECK-NEXT: vadd.vv v8, v8, v8
1133 ; CHECK-NEXT: vor.vv v8, v9, v8
1136 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked:
1137 ; CHECK-ZVBB: # %bb.0:
1138 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1139 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1140 ; CHECK-ZVBB-NEXT: ret
1141 %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1142 ret <vscale x 2 x i32> %v
1145 declare <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1147 define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1148 ; CHECK-LABEL: vp_bitreverse_nxv4i32:
1150 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1151 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
1152 ; CHECK-NEXT: lui a0, 16
1153 ; CHECK-NEXT: addi a0, a0, -256
1154 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1155 ; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
1156 ; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
1157 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
1158 ; CHECK-NEXT: lui a0, 61681
1159 ; CHECK-NEXT: addi a0, a0, -241
1160 ; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
1161 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1162 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
1163 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
1164 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
1165 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1166 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1167 ; CHECK-NEXT: lui a0, 209715
1168 ; CHECK-NEXT: addi a0, a0, 819
1169 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1170 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
1171 ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
1172 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1173 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1174 ; CHECK-NEXT: lui a0, 349525
1175 ; CHECK-NEXT: addi a0, a0, 1365
1176 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1177 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
1178 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
1179 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
1180 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1181 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1182 ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
1185 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32:
1186 ; CHECK-ZVBB: # %bb.0:
1187 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1188 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1189 ; CHECK-ZVBB-NEXT: ret
1190 %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
1191 ret <vscale x 4 x i32> %v
1194 define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
1195 ; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked:
1197 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1198 ; CHECK-NEXT: vsrl.vi v10, v8, 8
1199 ; CHECK-NEXT: lui a0, 16
1200 ; CHECK-NEXT: vsrl.vi v12, v8, 24
1201 ; CHECK-NEXT: addi a0, a0, -256
1202 ; CHECK-NEXT: vand.vx v10, v10, a0
1203 ; CHECK-NEXT: vor.vv v10, v10, v12
1204 ; CHECK-NEXT: vsll.vi v12, v8, 24
1205 ; CHECK-NEXT: vand.vx v8, v8, a0
1206 ; CHECK-NEXT: lui a0, 61681
1207 ; CHECK-NEXT: addi a0, a0, -241
1208 ; CHECK-NEXT: vsll.vi v8, v8, 8
1209 ; CHECK-NEXT: vor.vv v8, v12, v8
1210 ; CHECK-NEXT: vor.vv v8, v8, v10
1211 ; CHECK-NEXT: vsrl.vi v10, v8, 4
1212 ; CHECK-NEXT: vand.vx v8, v8, a0
1213 ; CHECK-NEXT: vand.vx v10, v10, a0
1214 ; CHECK-NEXT: lui a0, 209715
1215 ; CHECK-NEXT: addi a0, a0, 819
1216 ; CHECK-NEXT: vsll.vi v8, v8, 4
1217 ; CHECK-NEXT: vor.vv v8, v10, v8
1218 ; CHECK-NEXT: vsrl.vi v10, v8, 2
1219 ; CHECK-NEXT: vand.vx v8, v8, a0
1220 ; CHECK-NEXT: vand.vx v10, v10, a0
1221 ; CHECK-NEXT: lui a0, 349525
1222 ; CHECK-NEXT: addi a0, a0, 1365
1223 ; CHECK-NEXT: vsll.vi v8, v8, 2
1224 ; CHECK-NEXT: vor.vv v8, v10, v8
1225 ; CHECK-NEXT: vsrl.vi v10, v8, 1
1226 ; CHECK-NEXT: vand.vx v8, v8, a0
1227 ; CHECK-NEXT: vand.vx v10, v10, a0
1228 ; CHECK-NEXT: vadd.vv v8, v8, v8
1229 ; CHECK-NEXT: vor.vv v8, v10, v8
1232 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked:
1233 ; CHECK-ZVBB: # %bb.0:
1234 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1235 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1236 ; CHECK-ZVBB-NEXT: ret
1237 %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1238 ret <vscale x 4 x i32> %v
1241 declare <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1243 define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1244 ; CHECK-LABEL: vp_bitreverse_nxv8i32:
1246 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1247 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
1248 ; CHECK-NEXT: lui a0, 16
1249 ; CHECK-NEXT: addi a0, a0, -256
1250 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1251 ; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
1252 ; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
1253 ; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
1254 ; CHECK-NEXT: lui a0, 61681
1255 ; CHECK-NEXT: addi a0, a0, -241
1256 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
1257 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1258 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
1259 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
1260 ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
1261 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1262 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1263 ; CHECK-NEXT: lui a0, 209715
1264 ; CHECK-NEXT: addi a0, a0, 819
1265 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
1266 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
1267 ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
1268 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1269 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1270 ; CHECK-NEXT: lui a0, 349525
1271 ; CHECK-NEXT: addi a0, a0, 1365
1272 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
1273 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
1274 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
1275 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
1276 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1277 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1278 ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
1281 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32:
1282 ; CHECK-ZVBB: # %bb.0:
1283 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1284 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1285 ; CHECK-ZVBB-NEXT: ret
1286 %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
1287 ret <vscale x 8 x i32> %v
1290 define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
1291 ; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked:
1293 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1294 ; CHECK-NEXT: vsrl.vi v12, v8, 8
1295 ; CHECK-NEXT: lui a0, 16
1296 ; CHECK-NEXT: vsrl.vi v16, v8, 24
1297 ; CHECK-NEXT: addi a0, a0, -256
1298 ; CHECK-NEXT: vand.vx v12, v12, a0
1299 ; CHECK-NEXT: vor.vv v12, v12, v16
1300 ; CHECK-NEXT: vsll.vi v16, v8, 24
1301 ; CHECK-NEXT: vand.vx v8, v8, a0
1302 ; CHECK-NEXT: lui a0, 61681
1303 ; CHECK-NEXT: addi a0, a0, -241
1304 ; CHECK-NEXT: vsll.vi v8, v8, 8
1305 ; CHECK-NEXT: vor.vv v8, v16, v8
1306 ; CHECK-NEXT: vor.vv v8, v8, v12
1307 ; CHECK-NEXT: vsrl.vi v12, v8, 4
1308 ; CHECK-NEXT: vand.vx v8, v8, a0
1309 ; CHECK-NEXT: vand.vx v12, v12, a0
1310 ; CHECK-NEXT: lui a0, 209715
1311 ; CHECK-NEXT: addi a0, a0, 819
1312 ; CHECK-NEXT: vsll.vi v8, v8, 4
1313 ; CHECK-NEXT: vor.vv v8, v12, v8
1314 ; CHECK-NEXT: vsrl.vi v12, v8, 2
1315 ; CHECK-NEXT: vand.vx v8, v8, a0
1316 ; CHECK-NEXT: vand.vx v12, v12, a0
1317 ; CHECK-NEXT: lui a0, 349525
1318 ; CHECK-NEXT: addi a0, a0, 1365
1319 ; CHECK-NEXT: vsll.vi v8, v8, 2
1320 ; CHECK-NEXT: vor.vv v8, v12, v8
1321 ; CHECK-NEXT: vsrl.vi v12, v8, 1
1322 ; CHECK-NEXT: vand.vx v8, v8, a0
1323 ; CHECK-NEXT: vand.vx v12, v12, a0
1324 ; CHECK-NEXT: vadd.vv v8, v8, v8
1325 ; CHECK-NEXT: vor.vv v8, v12, v8
1328 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked:
1329 ; CHECK-ZVBB: # %bb.0:
1330 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1331 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1332 ; CHECK-ZVBB-NEXT: ret
1333 %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1334 ret <vscale x 8 x i32> %v
1337 declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1339 define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1340 ; CHECK-LABEL: vp_bitreverse_nxv16i32:
1342 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1343 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
1344 ; CHECK-NEXT: lui a0, 16
1345 ; CHECK-NEXT: addi a0, a0, -256
1346 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1347 ; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t
1348 ; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
1349 ; CHECK-NEXT: vand.vx v24, v8, a0, v0.t
1350 ; CHECK-NEXT: lui a0, 61681
1351 ; CHECK-NEXT: addi a0, a0, -241
1352 ; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t
1353 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
1354 ; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
1355 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
1356 ; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t
1357 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1358 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1359 ; CHECK-NEXT: lui a0, 209715
1360 ; CHECK-NEXT: addi a0, a0, 819
1361 ; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
1362 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
1363 ; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t
1364 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1365 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1366 ; CHECK-NEXT: lui a0, 349525
1367 ; CHECK-NEXT: addi a0, a0, 1365
1368 ; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
1369 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
1370 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
1371 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
1372 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
1373 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
1374 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
1377 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32:
1378 ; CHECK-ZVBB: # %bb.0:
1379 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1380 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1381 ; CHECK-ZVBB-NEXT: ret
1382 %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
1383 ret <vscale x 16 x i32> %v
1386 define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
1387 ; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked:
1389 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1390 ; CHECK-NEXT: vsrl.vi v16, v8, 8
1391 ; CHECK-NEXT: lui a0, 16
1392 ; CHECK-NEXT: vsrl.vi v24, v8, 24
1393 ; CHECK-NEXT: addi a0, a0, -256
1394 ; CHECK-NEXT: vand.vx v16, v16, a0
1395 ; CHECK-NEXT: vor.vv v16, v16, v24
1396 ; CHECK-NEXT: vsll.vi v24, v8, 24
1397 ; CHECK-NEXT: vand.vx v8, v8, a0
1398 ; CHECK-NEXT: lui a0, 61681
1399 ; CHECK-NEXT: addi a0, a0, -241
1400 ; CHECK-NEXT: vsll.vi v8, v8, 8
1401 ; CHECK-NEXT: vor.vv v8, v24, v8
1402 ; CHECK-NEXT: vor.vv v8, v8, v16
1403 ; CHECK-NEXT: vsrl.vi v16, v8, 4
1404 ; CHECK-NEXT: vand.vx v8, v8, a0
1405 ; CHECK-NEXT: vand.vx v16, v16, a0
1406 ; CHECK-NEXT: lui a0, 209715
1407 ; CHECK-NEXT: addi a0, a0, 819
1408 ; CHECK-NEXT: vsll.vi v8, v8, 4
1409 ; CHECK-NEXT: vor.vv v8, v16, v8
1410 ; CHECK-NEXT: vsrl.vi v16, v8, 2
1411 ; CHECK-NEXT: vand.vx v8, v8, a0
1412 ; CHECK-NEXT: vand.vx v16, v16, a0
1413 ; CHECK-NEXT: lui a0, 349525
1414 ; CHECK-NEXT: addi a0, a0, 1365
1415 ; CHECK-NEXT: vsll.vi v8, v8, 2
1416 ; CHECK-NEXT: vor.vv v8, v16, v8
1417 ; CHECK-NEXT: vsrl.vi v16, v8, 1
1418 ; CHECK-NEXT: vand.vx v8, v8, a0
1419 ; CHECK-NEXT: vand.vx v16, v16, a0
1420 ; CHECK-NEXT: vadd.vv v8, v8, v8
1421 ; CHECK-NEXT: vor.vv v8, v16, v8
1424 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked:
1425 ; CHECK-ZVBB: # %bb.0:
1426 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1427 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1428 ; CHECK-ZVBB-NEXT: ret
1429 %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1430 ret <vscale x 16 x i32> %v
1433 declare <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1435 define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1436 ; RV32-LABEL: vp_bitreverse_nxv1i64:
1438 ; RV32-NEXT: addi sp, sp, -16
1439 ; RV32-NEXT: .cfi_def_cfa_offset 16
1440 ; RV32-NEXT: lui a4, 1044480
1441 ; RV32-NEXT: li a3, 56
1442 ; RV32-NEXT: lui a5, 16
1443 ; RV32-NEXT: li a2, 40
1444 ; RV32-NEXT: lui a1, 4080
1445 ; RV32-NEXT: addi a6, sp, 8
1446 ; RV32-NEXT: sw a4, 8(sp)
1447 ; RV32-NEXT: sw zero, 12(sp)
1448 ; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma
1449 ; RV32-NEXT: vlse64.v v9, (a6), zero
1450 ; RV32-NEXT: lui a4, 61681
1451 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1452 ; RV32-NEXT: vsll.vx v10, v8, a3, v0.t
1453 ; RV32-NEXT: addi a5, a5, -256
1454 ; RV32-NEXT: vand.vx v11, v8, a5, v0.t
1455 ; RV32-NEXT: vsll.vx v11, v11, a2, v0.t
1456 ; RV32-NEXT: vor.vv v10, v10, v11, v0.t
1457 ; RV32-NEXT: vand.vx v11, v8, a1, v0.t
1458 ; RV32-NEXT: vsll.vi v11, v11, 24, v0.t
1459 ; RV32-NEXT: vand.vv v12, v8, v9, v0.t
1460 ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
1461 ; RV32-NEXT: vor.vv v11, v11, v12, v0.t
1462 ; RV32-NEXT: vor.vv v10, v10, v11, v0.t
1463 ; RV32-NEXT: vsrl.vx v11, v8, a3, v0.t
1464 ; RV32-NEXT: lui a3, 209715
1465 ; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t
1466 ; RV32-NEXT: lui a2, 349525
1467 ; RV32-NEXT: addi a4, a4, -241
1468 ; RV32-NEXT: addi a3, a3, 819
1469 ; RV32-NEXT: addi a2, a2, 1365
1470 ; RV32-NEXT: vand.vx v12, v12, a5, v0.t
1471 ; RV32-NEXT: vor.vv v11, v12, v11, v0.t
1472 ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
1473 ; RV32-NEXT: vand.vx v12, v12, a1, v0.t
1474 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1475 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t
1476 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1477 ; RV32-NEXT: vmv.v.x v9, a4
1478 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1479 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
1480 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1481 ; RV32-NEXT: vmv.v.x v12, a3
1482 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1483 ; RV32-NEXT: vor.vv v8, v8, v11, v0.t
1484 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1485 ; RV32-NEXT: vmv.v.x v11, a2
1486 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1487 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1488 ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
1489 ; RV32-NEXT: vand.vv v10, v10, v9, v0.t
1490 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t
1491 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1492 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1493 ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
1494 ; RV32-NEXT: vand.vv v9, v9, v12, v0.t
1495 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
1496 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1497 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1498 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
1499 ; RV32-NEXT: vand.vv v9, v9, v11, v0.t
1500 ; RV32-NEXT: vand.vv v8, v8, v11, v0.t
1501 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
1502 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1503 ; RV32-NEXT: addi sp, sp, 16
1504 ; RV32-NEXT: .cfi_def_cfa_offset 0
1507 ; RV64-LABEL: vp_bitreverse_nxv1i64:
1509 ; RV64-NEXT: lui a1, 4080
1510 ; RV64-NEXT: li a3, 255
1511 ; RV64-NEXT: li a2, 56
1512 ; RV64-NEXT: lui a4, 16
1513 ; RV64-NEXT: lui a5, 61681
1514 ; RV64-NEXT: lui a6, 209715
1515 ; RV64-NEXT: lui a7, 349525
1516 ; RV64-NEXT: addiw a5, a5, -241
1517 ; RV64-NEXT: addiw a6, a6, 819
1518 ; RV64-NEXT: addiw a7, a7, 1365
1519 ; RV64-NEXT: slli t0, a5, 32
1520 ; RV64-NEXT: add t0, a5, t0
1521 ; RV64-NEXT: slli a5, a6, 32
1522 ; RV64-NEXT: add a6, a6, a5
1523 ; RV64-NEXT: slli a5, a7, 32
1524 ; RV64-NEXT: add a5, a7, a5
1525 ; RV64-NEXT: li a7, 40
1526 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1527 ; RV64-NEXT: vand.vx v9, v8, a1, v0.t
1528 ; RV64-NEXT: slli a3, a3, 24
1529 ; RV64-NEXT: addiw a0, a4, -256
1530 ; RV64-NEXT: vsll.vi v9, v9, 24, v0.t
1531 ; RV64-NEXT: vand.vx v10, v8, a3, v0.t
1532 ; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
1533 ; RV64-NEXT: vor.vv v9, v9, v10, v0.t
1534 ; RV64-NEXT: vsll.vx v10, v8, a2, v0.t
1535 ; RV64-NEXT: vand.vx v11, v8, a0, v0.t
1536 ; RV64-NEXT: vsll.vx v11, v11, a7, v0.t
1537 ; RV64-NEXT: vor.vv v10, v10, v11, v0.t
1538 ; RV64-NEXT: vor.vv v9, v10, v9, v0.t
1539 ; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t
1540 ; RV64-NEXT: vsrl.vx v11, v8, a7, v0.t
1541 ; RV64-NEXT: vand.vx v11, v11, a0, v0.t
1542 ; RV64-NEXT: vor.vv v10, v11, v10, v0.t
1543 ; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t
1544 ; RV64-NEXT: vand.vx v11, v11, a1, v0.t
1545 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1546 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1547 ; RV64-NEXT: vor.vv v8, v8, v11, v0.t
1548 ; RV64-NEXT: vor.vv v8, v8, v10, v0.t
1549 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1550 ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
1551 ; RV64-NEXT: vand.vx v9, v9, t0, v0.t
1552 ; RV64-NEXT: vand.vx v8, v8, t0, v0.t
1553 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
1554 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1555 ; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
1556 ; RV64-NEXT: vand.vx v9, v9, a6, v0.t
1557 ; RV64-NEXT: vand.vx v8, v8, a6, v0.t
1558 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
1559 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1560 ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
1561 ; RV64-NEXT: vand.vx v9, v9, a5, v0.t
1562 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t
1563 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
1564 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1567 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64:
1568 ; CHECK-ZVBB: # %bb.0:
1569 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1570 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1571 ; CHECK-ZVBB-NEXT: ret
1572 %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
1573 ret <vscale x 1 x i64> %v
1576 define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
1577 ; RV32-LABEL: vp_bitreverse_nxv1i64_unmasked:
1579 ; RV32-NEXT: addi sp, sp, -16
1580 ; RV32-NEXT: .cfi_def_cfa_offset 16
1581 ; RV32-NEXT: lui a1, 1044480
1582 ; RV32-NEXT: li a2, 56
1583 ; RV32-NEXT: lui a3, 16
1584 ; RV32-NEXT: li a4, 40
1585 ; RV32-NEXT: lui a5, 4080
1586 ; RV32-NEXT: addi a6, sp, 8
1587 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1588 ; RV32-NEXT: vsrl.vi v9, v8, 24
1589 ; RV32-NEXT: sw a1, 8(sp)
1590 ; RV32-NEXT: sw zero, 12(sp)
1591 ; RV32-NEXT: vsll.vx v10, v8, a2
1592 ; RV32-NEXT: addi a1, a3, -256
1593 ; RV32-NEXT: vsrl.vx v11, v8, a2
1594 ; RV32-NEXT: vsrl.vx v12, v8, a4
1595 ; RV32-NEXT: vand.vx v13, v8, a1
1596 ; RV32-NEXT: vand.vx v12, v12, a1
1597 ; RV32-NEXT: vor.vv v11, v12, v11
1598 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1599 ; RV32-NEXT: vlse64.v v12, (a6), zero
1600 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1601 ; RV32-NEXT: vsll.vx v13, v13, a4
1602 ; RV32-NEXT: vor.vv v10, v10, v13
1603 ; RV32-NEXT: vsrl.vi v13, v8, 8
1604 ; RV32-NEXT: vand.vx v9, v9, a5
1605 ; RV32-NEXT: vand.vv v13, v13, v12
1606 ; RV32-NEXT: vor.vv v9, v13, v9
1607 ; RV32-NEXT: lui a1, 61681
1608 ; RV32-NEXT: lui a2, 209715
1609 ; RV32-NEXT: lui a3, 349525
1610 ; RV32-NEXT: vand.vv v12, v8, v12
1611 ; RV32-NEXT: vand.vx v8, v8, a5
1612 ; RV32-NEXT: addi a1, a1, -241
1613 ; RV32-NEXT: addi a2, a2, 819
1614 ; RV32-NEXT: addi a3, a3, 1365
1615 ; RV32-NEXT: vsll.vi v8, v8, 24
1616 ; RV32-NEXT: vsll.vi v12, v12, 8
1617 ; RV32-NEXT: vor.vv v8, v8, v12
1618 ; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma
1619 ; RV32-NEXT: vmv.v.x v12, a1
1620 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1621 ; RV32-NEXT: vor.vv v9, v9, v11
1622 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1623 ; RV32-NEXT: vmv.v.x v11, a2
1624 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1625 ; RV32-NEXT: vor.vv v8, v10, v8
1626 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1627 ; RV32-NEXT: vmv.v.x v10, a3
1628 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1629 ; RV32-NEXT: vor.vv v8, v8, v9
1630 ; RV32-NEXT: vsrl.vi v9, v8, 4
1631 ; RV32-NEXT: vand.vv v8, v8, v12
1632 ; RV32-NEXT: vand.vv v9, v9, v12
1633 ; RV32-NEXT: vsll.vi v8, v8, 4
1634 ; RV32-NEXT: vor.vv v8, v9, v8
1635 ; RV32-NEXT: vsrl.vi v9, v8, 2
1636 ; RV32-NEXT: vand.vv v8, v8, v11
1637 ; RV32-NEXT: vand.vv v9, v9, v11
1638 ; RV32-NEXT: vsll.vi v8, v8, 2
1639 ; RV32-NEXT: vor.vv v8, v9, v8
1640 ; RV32-NEXT: vsrl.vi v9, v8, 1
1641 ; RV32-NEXT: vand.vv v8, v8, v10
1642 ; RV32-NEXT: vand.vv v9, v9, v10
1643 ; RV32-NEXT: vadd.vv v8, v8, v8
1644 ; RV32-NEXT: vor.vv v8, v9, v8
1645 ; RV32-NEXT: addi sp, sp, 16
1646 ; RV32-NEXT: .cfi_def_cfa_offset 0
1649 ; RV64-LABEL: vp_bitreverse_nxv1i64_unmasked:
1651 ; RV64-NEXT: lui a1, 4080
1652 ; RV64-NEXT: li a2, 255
1653 ; RV64-NEXT: li a3, 56
1654 ; RV64-NEXT: lui a4, 16
1655 ; RV64-NEXT: li a5, 40
1656 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1657 ; RV64-NEXT: vsrl.vi v9, v8, 24
1658 ; RV64-NEXT: vsrl.vi v10, v8, 8
1659 ; RV64-NEXT: addiw a0, a4, -256
1660 ; RV64-NEXT: vsrl.vx v11, v8, a3
1661 ; RV64-NEXT: vsrl.vx v12, v8, a5
1662 ; RV64-NEXT: vand.vx v12, v12, a0
1663 ; RV64-NEXT: vor.vv v11, v12, v11
1664 ; RV64-NEXT: vand.vx v12, v8, a1
1665 ; RV64-NEXT: slli a2, a2, 24
1666 ; RV64-NEXT: vand.vx v9, v9, a1
1667 ; RV64-NEXT: vsll.vi v12, v12, 24
1668 ; RV64-NEXT: vand.vx v10, v10, a2
1669 ; RV64-NEXT: vor.vv v9, v10, v9
1670 ; RV64-NEXT: vand.vx v10, v8, a2
1671 ; RV64-NEXT: vsll.vi v10, v10, 8
1672 ; RV64-NEXT: vor.vv v10, v12, v10
1673 ; RV64-NEXT: vsll.vx v12, v8, a3
1674 ; RV64-NEXT: vand.vx v8, v8, a0
1675 ; RV64-NEXT: vsll.vx v8, v8, a5
1676 ; RV64-NEXT: vor.vv v8, v12, v8
1677 ; RV64-NEXT: lui a0, 61681
1678 ; RV64-NEXT: lui a1, 209715
1679 ; RV64-NEXT: lui a2, 349525
1680 ; RV64-NEXT: addiw a0, a0, -241
1681 ; RV64-NEXT: addiw a1, a1, 819
1682 ; RV64-NEXT: addiw a2, a2, 1365
1683 ; RV64-NEXT: slli a3, a0, 32
1684 ; RV64-NEXT: slli a4, a1, 32
1685 ; RV64-NEXT: add a0, a0, a3
1686 ; RV64-NEXT: slli a3, a2, 32
1687 ; RV64-NEXT: add a1, a1, a4
1688 ; RV64-NEXT: add a2, a2, a3
1689 ; RV64-NEXT: vor.vv v9, v9, v11
1690 ; RV64-NEXT: vor.vv v8, v8, v10
1691 ; RV64-NEXT: vor.vv v8, v8, v9
1692 ; RV64-NEXT: vsrl.vi v9, v8, 4
1693 ; RV64-NEXT: vand.vx v8, v8, a0
1694 ; RV64-NEXT: vand.vx v9, v9, a0
1695 ; RV64-NEXT: vsll.vi v8, v8, 4
1696 ; RV64-NEXT: vor.vv v8, v9, v8
1697 ; RV64-NEXT: vsrl.vi v9, v8, 2
1698 ; RV64-NEXT: vand.vx v8, v8, a1
1699 ; RV64-NEXT: vand.vx v9, v9, a1
1700 ; RV64-NEXT: vsll.vi v8, v8, 2
1701 ; RV64-NEXT: vor.vv v8, v9, v8
1702 ; RV64-NEXT: vsrl.vi v9, v8, 1
1703 ; RV64-NEXT: vand.vx v8, v8, a2
1704 ; RV64-NEXT: vand.vx v9, v9, a2
1705 ; RV64-NEXT: vadd.vv v8, v8, v8
1706 ; RV64-NEXT: vor.vv v8, v9, v8
1709 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64_unmasked:
1710 ; CHECK-ZVBB: # %bb.0:
1711 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1712 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1713 ; CHECK-ZVBB-NEXT: ret
1714 %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1715 ret <vscale x 1 x i64> %v
1718 declare <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1720 define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1721 ; RV32-LABEL: vp_bitreverse_nxv2i64:
1723 ; RV32-NEXT: addi sp, sp, -16
1724 ; RV32-NEXT: .cfi_def_cfa_offset 16
1725 ; RV32-NEXT: lui a4, 1044480
1726 ; RV32-NEXT: li a3, 56
1727 ; RV32-NEXT: lui a5, 16
1728 ; RV32-NEXT: li a2, 40
1729 ; RV32-NEXT: lui a1, 4080
1730 ; RV32-NEXT: addi a6, sp, 8
1731 ; RV32-NEXT: sw a4, 8(sp)
1732 ; RV32-NEXT: sw zero, 12(sp)
1733 ; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma
1734 ; RV32-NEXT: vlse64.v v10, (a6), zero
1735 ; RV32-NEXT: lui a4, 61681
1736 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1737 ; RV32-NEXT: vsll.vx v12, v8, a3, v0.t
1738 ; RV32-NEXT: addi a5, a5, -256
1739 ; RV32-NEXT: vand.vx v14, v8, a5, v0.t
1740 ; RV32-NEXT: vsll.vx v14, v14, a2, v0.t
1741 ; RV32-NEXT: vor.vv v12, v12, v14, v0.t
1742 ; RV32-NEXT: vand.vx v14, v8, a1, v0.t
1743 ; RV32-NEXT: vsll.vi v14, v14, 24, v0.t
1744 ; RV32-NEXT: vand.vv v16, v8, v10, v0.t
1745 ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
1746 ; RV32-NEXT: vor.vv v14, v14, v16, v0.t
1747 ; RV32-NEXT: vor.vv v12, v12, v14, v0.t
1748 ; RV32-NEXT: vsrl.vx v14, v8, a3, v0.t
1749 ; RV32-NEXT: lui a3, 209715
1750 ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
1751 ; RV32-NEXT: lui a2, 349525
1752 ; RV32-NEXT: addi a4, a4, -241
1753 ; RV32-NEXT: addi a3, a3, 819
1754 ; RV32-NEXT: addi a2, a2, 1365
1755 ; RV32-NEXT: vand.vx v16, v16, a5, v0.t
1756 ; RV32-NEXT: vor.vv v14, v16, v14, v0.t
1757 ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
1758 ; RV32-NEXT: vand.vx v16, v16, a1, v0.t
1759 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1760 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1761 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1762 ; RV32-NEXT: vmv.v.x v10, a4
1763 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1764 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1765 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1766 ; RV32-NEXT: vmv.v.x v16, a3
1767 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1768 ; RV32-NEXT: vor.vv v8, v8, v14, v0.t
1769 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1770 ; RV32-NEXT: vmv.v.x v14, a2
1771 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1772 ; RV32-NEXT: vor.vv v8, v12, v8, v0.t
1773 ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
1774 ; RV32-NEXT: vand.vv v12, v12, v10, v0.t
1775 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
1776 ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1777 ; RV32-NEXT: vor.vv v8, v12, v8, v0.t
1778 ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
1779 ; RV32-NEXT: vand.vv v10, v10, v16, v0.t
1780 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1781 ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1782 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1783 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
1784 ; RV32-NEXT: vand.vv v10, v10, v14, v0.t
1785 ; RV32-NEXT: vand.vv v8, v8, v14, v0.t
1786 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
1787 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
1788 ; RV32-NEXT: addi sp, sp, 16
1789 ; RV32-NEXT: .cfi_def_cfa_offset 0
1792 ; RV64-LABEL: vp_bitreverse_nxv2i64:
1794 ; RV64-NEXT: lui a1, 4080
1795 ; RV64-NEXT: li a3, 255
1796 ; RV64-NEXT: li a2, 56
1797 ; RV64-NEXT: lui a4, 16
1798 ; RV64-NEXT: lui a5, 61681
1799 ; RV64-NEXT: lui a6, 209715
1800 ; RV64-NEXT: lui a7, 349525
1801 ; RV64-NEXT: addiw a5, a5, -241
1802 ; RV64-NEXT: addiw a6, a6, 819
1803 ; RV64-NEXT: addiw a7, a7, 1365
1804 ; RV64-NEXT: slli t0, a5, 32
1805 ; RV64-NEXT: add t0, a5, t0
1806 ; RV64-NEXT: slli a5, a6, 32
1807 ; RV64-NEXT: add a6, a6, a5
1808 ; RV64-NEXT: slli a5, a7, 32
1809 ; RV64-NEXT: add a5, a7, a5
1810 ; RV64-NEXT: li a7, 40
1811 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1812 ; RV64-NEXT: vand.vx v10, v8, a1, v0.t
1813 ; RV64-NEXT: slli a3, a3, 24
1814 ; RV64-NEXT: addiw a0, a4, -256
1815 ; RV64-NEXT: vsll.vi v10, v10, 24, v0.t
1816 ; RV64-NEXT: vand.vx v12, v8, a3, v0.t
1817 ; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
1818 ; RV64-NEXT: vor.vv v10, v10, v12, v0.t
1819 ; RV64-NEXT: vsll.vx v12, v8, a2, v0.t
1820 ; RV64-NEXT: vand.vx v14, v8, a0, v0.t
1821 ; RV64-NEXT: vsll.vx v14, v14, a7, v0.t
1822 ; RV64-NEXT: vor.vv v12, v12, v14, v0.t
1823 ; RV64-NEXT: vor.vv v10, v12, v10, v0.t
1824 ; RV64-NEXT: vsrl.vx v12, v8, a2, v0.t
1825 ; RV64-NEXT: vsrl.vx v14, v8, a7, v0.t
1826 ; RV64-NEXT: vand.vx v14, v14, a0, v0.t
1827 ; RV64-NEXT: vor.vv v12, v14, v12, v0.t
1828 ; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t
1829 ; RV64-NEXT: vand.vx v14, v14, a1, v0.t
1830 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1831 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
1832 ; RV64-NEXT: vor.vv v8, v8, v14, v0.t
1833 ; RV64-NEXT: vor.vv v8, v8, v12, v0.t
1834 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1835 ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
1836 ; RV64-NEXT: vand.vx v10, v10, t0, v0.t
1837 ; RV64-NEXT: vand.vx v8, v8, t0, v0.t
1838 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
1839 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1840 ; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
1841 ; RV64-NEXT: vand.vx v10, v10, a6, v0.t
1842 ; RV64-NEXT: vand.vx v8, v8, a6, v0.t
1843 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
1844 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1845 ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
1846 ; RV64-NEXT: vand.vx v10, v10, a5, v0.t
1847 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t
1848 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
1849 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
1852 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64:
1853 ; CHECK-ZVBB: # %bb.0:
1854 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1855 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
1856 ; CHECK-ZVBB-NEXT: ret
1857 %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
1858 ret <vscale x 2 x i64> %v
1861 define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
1862 ; RV32-LABEL: vp_bitreverse_nxv2i64_unmasked:
1864 ; RV32-NEXT: addi sp, sp, -16
1865 ; RV32-NEXT: .cfi_def_cfa_offset 16
1866 ; RV32-NEXT: lui a1, 1044480
1867 ; RV32-NEXT: li a2, 56
1868 ; RV32-NEXT: lui a3, 16
1869 ; RV32-NEXT: li a4, 40
1870 ; RV32-NEXT: lui a5, 4080
1871 ; RV32-NEXT: addi a6, sp, 8
1872 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1873 ; RV32-NEXT: vsrl.vi v14, v8, 24
1874 ; RV32-NEXT: sw a1, 8(sp)
1875 ; RV32-NEXT: sw zero, 12(sp)
1876 ; RV32-NEXT: vsll.vx v12, v8, a2
1877 ; RV32-NEXT: addi a1, a3, -256
1878 ; RV32-NEXT: vsrl.vx v10, v8, a2
1879 ; RV32-NEXT: vsrl.vx v16, v8, a4
1880 ; RV32-NEXT: vand.vx v18, v8, a1
1881 ; RV32-NEXT: vand.vx v16, v16, a1
1882 ; RV32-NEXT: vor.vv v10, v16, v10
1883 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1884 ; RV32-NEXT: vlse64.v v16, (a6), zero
1885 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1886 ; RV32-NEXT: vsll.vx v18, v18, a4
1887 ; RV32-NEXT: vor.vv v12, v12, v18
1888 ; RV32-NEXT: vsrl.vi v18, v8, 8
1889 ; RV32-NEXT: vand.vx v14, v14, a5
1890 ; RV32-NEXT: vand.vv v18, v18, v16
1891 ; RV32-NEXT: vor.vv v14, v18, v14
1892 ; RV32-NEXT: lui a1, 61681
1893 ; RV32-NEXT: lui a2, 209715
1894 ; RV32-NEXT: lui a3, 349525
1895 ; RV32-NEXT: vand.vv v16, v8, v16
1896 ; RV32-NEXT: vand.vx v8, v8, a5
1897 ; RV32-NEXT: addi a1, a1, -241
1898 ; RV32-NEXT: addi a2, a2, 819
1899 ; RV32-NEXT: addi a3, a3, 1365
1900 ; RV32-NEXT: vsll.vi v8, v8, 24
1901 ; RV32-NEXT: vsll.vi v16, v16, 8
1902 ; RV32-NEXT: vor.vv v8, v8, v16
1903 ; RV32-NEXT: vsetvli a4, zero, e32, m2, ta, ma
1904 ; RV32-NEXT: vmv.v.x v16, a1
1905 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1906 ; RV32-NEXT: vor.vv v10, v14, v10
1907 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1908 ; RV32-NEXT: vmv.v.x v14, a2
1909 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1910 ; RV32-NEXT: vor.vv v8, v12, v8
1911 ; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1912 ; RV32-NEXT: vmv.v.x v12, a3
1913 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1914 ; RV32-NEXT: vor.vv v8, v8, v10
1915 ; RV32-NEXT: vsrl.vi v10, v8, 4
1916 ; RV32-NEXT: vand.vv v8, v8, v16
1917 ; RV32-NEXT: vand.vv v10, v10, v16
1918 ; RV32-NEXT: vsll.vi v8, v8, 4
1919 ; RV32-NEXT: vor.vv v8, v10, v8
1920 ; RV32-NEXT: vsrl.vi v10, v8, 2
1921 ; RV32-NEXT: vand.vv v8, v8, v14
1922 ; RV32-NEXT: vand.vv v10, v10, v14
1923 ; RV32-NEXT: vsll.vi v8, v8, 2
1924 ; RV32-NEXT: vor.vv v8, v10, v8
1925 ; RV32-NEXT: vsrl.vi v10, v8, 1
1926 ; RV32-NEXT: vand.vv v8, v8, v12
1927 ; RV32-NEXT: vand.vv v10, v10, v12
1928 ; RV32-NEXT: vadd.vv v8, v8, v8
1929 ; RV32-NEXT: vor.vv v8, v10, v8
1930 ; RV32-NEXT: addi sp, sp, 16
1931 ; RV32-NEXT: .cfi_def_cfa_offset 0
1934 ; RV64-LABEL: vp_bitreverse_nxv2i64_unmasked:
1936 ; RV64-NEXT: lui a1, 4080
1937 ; RV64-NEXT: li a2, 255
1938 ; RV64-NEXT: li a3, 56
1939 ; RV64-NEXT: lui a4, 16
1940 ; RV64-NEXT: li a5, 40
1941 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1942 ; RV64-NEXT: vsrl.vi v12, v8, 24
1943 ; RV64-NEXT: vsrl.vi v14, v8, 8
1944 ; RV64-NEXT: addiw a0, a4, -256
1945 ; RV64-NEXT: vsrl.vx v10, v8, a3
1946 ; RV64-NEXT: vsrl.vx v16, v8, a5
1947 ; RV64-NEXT: vand.vx v16, v16, a0
1948 ; RV64-NEXT: vor.vv v10, v16, v10
1949 ; RV64-NEXT: vand.vx v16, v8, a1
1950 ; RV64-NEXT: slli a2, a2, 24
1951 ; RV64-NEXT: vand.vx v12, v12, a1
1952 ; RV64-NEXT: vsll.vi v16, v16, 24
1953 ; RV64-NEXT: vand.vx v14, v14, a2
1954 ; RV64-NEXT: vor.vv v12, v14, v12
1955 ; RV64-NEXT: vand.vx v14, v8, a2
1956 ; RV64-NEXT: vsll.vi v14, v14, 8
1957 ; RV64-NEXT: vor.vv v14, v16, v14
1958 ; RV64-NEXT: vsll.vx v16, v8, a3
1959 ; RV64-NEXT: vand.vx v8, v8, a0
1960 ; RV64-NEXT: vsll.vx v8, v8, a5
1961 ; RV64-NEXT: vor.vv v8, v16, v8
1962 ; RV64-NEXT: lui a0, 61681
1963 ; RV64-NEXT: lui a1, 209715
1964 ; RV64-NEXT: lui a2, 349525
1965 ; RV64-NEXT: addiw a0, a0, -241
1966 ; RV64-NEXT: addiw a1, a1, 819
1967 ; RV64-NEXT: addiw a2, a2, 1365
1968 ; RV64-NEXT: slli a3, a0, 32
1969 ; RV64-NEXT: slli a4, a1, 32
1970 ; RV64-NEXT: add a0, a0, a3
1971 ; RV64-NEXT: slli a3, a2, 32
1972 ; RV64-NEXT: add a1, a1, a4
1973 ; RV64-NEXT: add a2, a2, a3
1974 ; RV64-NEXT: vor.vv v10, v12, v10
1975 ; RV64-NEXT: vor.vv v8, v8, v14
1976 ; RV64-NEXT: vor.vv v8, v8, v10
1977 ; RV64-NEXT: vsrl.vi v10, v8, 4
1978 ; RV64-NEXT: vand.vx v8, v8, a0
1979 ; RV64-NEXT: vand.vx v10, v10, a0
1980 ; RV64-NEXT: vsll.vi v8, v8, 4
1981 ; RV64-NEXT: vor.vv v8, v10, v8
1982 ; RV64-NEXT: vsrl.vi v10, v8, 2
1983 ; RV64-NEXT: vand.vx v8, v8, a1
1984 ; RV64-NEXT: vand.vx v10, v10, a1
1985 ; RV64-NEXT: vsll.vi v8, v8, 2
1986 ; RV64-NEXT: vor.vv v8, v10, v8
1987 ; RV64-NEXT: vsrl.vi v10, v8, 1
1988 ; RV64-NEXT: vand.vx v8, v8, a2
1989 ; RV64-NEXT: vand.vx v10, v10, a2
1990 ; RV64-NEXT: vadd.vv v8, v8, v8
1991 ; RV64-NEXT: vor.vv v8, v10, v8
1994 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64_unmasked:
1995 ; CHECK-ZVBB: # %bb.0:
1996 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1997 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1998 ; CHECK-ZVBB-NEXT: ret
1999 %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
2000 ret <vscale x 2 x i64> %v
2003 declare <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2005 define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2006 ; RV32-LABEL: vp_bitreverse_nxv4i64:
2008 ; RV32-NEXT: addi sp, sp, -16
2009 ; RV32-NEXT: .cfi_def_cfa_offset 16
2010 ; RV32-NEXT: lui a4, 1044480
2011 ; RV32-NEXT: li a3, 56
2012 ; RV32-NEXT: lui a5, 16
2013 ; RV32-NEXT: li a2, 40
2014 ; RV32-NEXT: lui a1, 4080
2015 ; RV32-NEXT: addi a6, sp, 8
2016 ; RV32-NEXT: sw a4, 8(sp)
2017 ; RV32-NEXT: sw zero, 12(sp)
2018 ; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma
2019 ; RV32-NEXT: vlse64.v v12, (a6), zero
2020 ; RV32-NEXT: lui a4, 61681
2021 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2022 ; RV32-NEXT: vsll.vx v16, v8, a3, v0.t
2023 ; RV32-NEXT: addi a5, a5, -256
2024 ; RV32-NEXT: vand.vx v20, v8, a5, v0.t
2025 ; RV32-NEXT: vsll.vx v20, v20, a2, v0.t
2026 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
2027 ; RV32-NEXT: vand.vx v20, v8, a1, v0.t
2028 ; RV32-NEXT: vsll.vi v20, v20, 24, v0.t
2029 ; RV32-NEXT: vand.vv v24, v8, v12, v0.t
2030 ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
2031 ; RV32-NEXT: vor.vv v20, v20, v24, v0.t
2032 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
2033 ; RV32-NEXT: vsrl.vx v20, v8, a3, v0.t
2034 ; RV32-NEXT: lui a3, 209715
2035 ; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t
2036 ; RV32-NEXT: lui a2, 349525
2037 ; RV32-NEXT: addi a4, a4, -241
2038 ; RV32-NEXT: addi a3, a3, 819
2039 ; RV32-NEXT: addi a2, a2, 1365
2040 ; RV32-NEXT: vand.vx v24, v24, a5, v0.t
2041 ; RV32-NEXT: vor.vv v20, v24, v20, v0.t
2042 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
2043 ; RV32-NEXT: vand.vx v24, v24, a1, v0.t
2044 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2045 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
2046 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2047 ; RV32-NEXT: vmv.v.x v28, a4
2048 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2049 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
2050 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2051 ; RV32-NEXT: vmv.v.x v12, a3
2052 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2053 ; RV32-NEXT: vor.vv v20, v8, v20, v0.t
2054 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2055 ; RV32-NEXT: vmv.v.x v8, a2
2056 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2057 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
2058 ; RV32-NEXT: vsrl.vi v20, v16, 4, v0.t
2059 ; RV32-NEXT: vand.vv v20, v20, v28, v0.t
2060 ; RV32-NEXT: vand.vv v16, v16, v28, v0.t
2061 ; RV32-NEXT: vsll.vi v16, v16, 4, v0.t
2062 ; RV32-NEXT: vor.vv v16, v20, v16, v0.t
2063 ; RV32-NEXT: vsrl.vi v20, v16, 2, v0.t
2064 ; RV32-NEXT: vand.vv v20, v20, v12, v0.t
2065 ; RV32-NEXT: vand.vv v12, v16, v12, v0.t
2066 ; RV32-NEXT: vsll.vi v12, v12, 2, v0.t
2067 ; RV32-NEXT: vor.vv v12, v20, v12, v0.t
2068 ; RV32-NEXT: vsrl.vi v16, v12, 1, v0.t
2069 ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2070 ; RV32-NEXT: vand.vv v8, v12, v8, v0.t
2071 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
2072 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2073 ; RV32-NEXT: addi sp, sp, 16
2074 ; RV32-NEXT: .cfi_def_cfa_offset 0
2077 ; RV64-LABEL: vp_bitreverse_nxv4i64:
2079 ; RV64-NEXT: lui a1, 4080
2080 ; RV64-NEXT: li a3, 255
2081 ; RV64-NEXT: li a2, 56
2082 ; RV64-NEXT: lui a4, 16
2083 ; RV64-NEXT: lui a5, 61681
2084 ; RV64-NEXT: lui a6, 209715
2085 ; RV64-NEXT: lui a7, 349525
2086 ; RV64-NEXT: addiw a5, a5, -241
2087 ; RV64-NEXT: addiw a6, a6, 819
2088 ; RV64-NEXT: addiw a7, a7, 1365
2089 ; RV64-NEXT: slli t0, a5, 32
2090 ; RV64-NEXT: add t0, a5, t0
2091 ; RV64-NEXT: slli a5, a6, 32
2092 ; RV64-NEXT: add a6, a6, a5
2093 ; RV64-NEXT: slli a5, a7, 32
2094 ; RV64-NEXT: add a5, a7, a5
2095 ; RV64-NEXT: li a7, 40
2096 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2097 ; RV64-NEXT: vand.vx v12, v8, a1, v0.t
2098 ; RV64-NEXT: slli a3, a3, 24
2099 ; RV64-NEXT: addiw a0, a4, -256
2100 ; RV64-NEXT: vsll.vi v12, v12, 24, v0.t
2101 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t
2102 ; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
2103 ; RV64-NEXT: vor.vv v12, v12, v16, v0.t
2104 ; RV64-NEXT: vsll.vx v16, v8, a2, v0.t
2105 ; RV64-NEXT: vand.vx v20, v8, a0, v0.t
2106 ; RV64-NEXT: vsll.vx v20, v20, a7, v0.t
2107 ; RV64-NEXT: vor.vv v16, v16, v20, v0.t
2108 ; RV64-NEXT: vor.vv v12, v16, v12, v0.t
2109 ; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
2110 ; RV64-NEXT: vsrl.vx v20, v8, a7, v0.t
2111 ; RV64-NEXT: vand.vx v20, v20, a0, v0.t
2112 ; RV64-NEXT: vor.vv v16, v20, v16, v0.t
2113 ; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t
2114 ; RV64-NEXT: vand.vx v20, v20, a1, v0.t
2115 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
2116 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t
2117 ; RV64-NEXT: vor.vv v8, v8, v20, v0.t
2118 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
2119 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2120 ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
2121 ; RV64-NEXT: vand.vx v12, v12, t0, v0.t
2122 ; RV64-NEXT: vand.vx v8, v8, t0, v0.t
2123 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
2124 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2125 ; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
2126 ; RV64-NEXT: vand.vx v12, v12, a6, v0.t
2127 ; RV64-NEXT: vand.vx v8, v8, a6, v0.t
2128 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
2129 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2130 ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
2131 ; RV64-NEXT: vand.vx v12, v12, a5, v0.t
2132 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t
2133 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
2134 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
2137 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64:
2138 ; CHECK-ZVBB: # %bb.0:
2139 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2140 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
2141 ; CHECK-ZVBB-NEXT: ret
2142 %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
2143 ret <vscale x 4 x i64> %v
2146 define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
2147 ; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked:
2149 ; RV32-NEXT: addi sp, sp, -16
2150 ; RV32-NEXT: .cfi_def_cfa_offset 16
2151 ; RV32-NEXT: lui a1, 1044480
2152 ; RV32-NEXT: li a2, 56
2153 ; RV32-NEXT: lui a3, 16
2154 ; RV32-NEXT: li a4, 40
2155 ; RV32-NEXT: lui a5, 4080
2156 ; RV32-NEXT: addi a6, sp, 8
2157 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2158 ; RV32-NEXT: vsrl.vi v20, v8, 24
2159 ; RV32-NEXT: sw a1, 8(sp)
2160 ; RV32-NEXT: sw zero, 12(sp)
2161 ; RV32-NEXT: vsll.vx v16, v8, a2
2162 ; RV32-NEXT: addi a1, a3, -256
2163 ; RV32-NEXT: vsrl.vx v12, v8, a2
2164 ; RV32-NEXT: vsrl.vx v24, v8, a4
2165 ; RV32-NEXT: vand.vx v28, v8, a1
2166 ; RV32-NEXT: vand.vx v24, v24, a1
2167 ; RV32-NEXT: vor.vv v12, v24, v12
2168 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
2169 ; RV32-NEXT: vlse64.v v24, (a6), zero
2170 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2171 ; RV32-NEXT: vsll.vx v28, v28, a4
2172 ; RV32-NEXT: vor.vv v16, v16, v28
2173 ; RV32-NEXT: vsrl.vi v28, v8, 8
2174 ; RV32-NEXT: vand.vx v20, v20, a5
2175 ; RV32-NEXT: vand.vv v28, v28, v24
2176 ; RV32-NEXT: vor.vv v20, v28, v20
2177 ; RV32-NEXT: lui a1, 61681
2178 ; RV32-NEXT: lui a2, 209715
2179 ; RV32-NEXT: lui a3, 349525
2180 ; RV32-NEXT: vand.vv v24, v8, v24
2181 ; RV32-NEXT: vand.vx v8, v8, a5
2182 ; RV32-NEXT: addi a1, a1, -241
2183 ; RV32-NEXT: addi a2, a2, 819
2184 ; RV32-NEXT: addi a3, a3, 1365
2185 ; RV32-NEXT: vsll.vi v8, v8, 24
2186 ; RV32-NEXT: vsll.vi v24, v24, 8
2187 ; RV32-NEXT: vor.vv v8, v8, v24
2188 ; RV32-NEXT: vsetvli a4, zero, e32, m4, ta, ma
2189 ; RV32-NEXT: vmv.v.x v24, a1
2190 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2191 ; RV32-NEXT: vor.vv v12, v20, v12
2192 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2193 ; RV32-NEXT: vmv.v.x v20, a2
2194 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2195 ; RV32-NEXT: vor.vv v8, v16, v8
2196 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2197 ; RV32-NEXT: vmv.v.x v16, a3
2198 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2199 ; RV32-NEXT: vor.vv v8, v8, v12
2200 ; RV32-NEXT: vsrl.vi v12, v8, 4
2201 ; RV32-NEXT: vand.vv v8, v8, v24
2202 ; RV32-NEXT: vand.vv v12, v12, v24
2203 ; RV32-NEXT: vsll.vi v8, v8, 4
2204 ; RV32-NEXT: vor.vv v8, v12, v8
2205 ; RV32-NEXT: vsrl.vi v12, v8, 2
2206 ; RV32-NEXT: vand.vv v8, v8, v20
2207 ; RV32-NEXT: vand.vv v12, v12, v20
2208 ; RV32-NEXT: vsll.vi v8, v8, 2
2209 ; RV32-NEXT: vor.vv v8, v12, v8
2210 ; RV32-NEXT: vsrl.vi v12, v8, 1
2211 ; RV32-NEXT: vand.vv v8, v8, v16
2212 ; RV32-NEXT: vand.vv v12, v12, v16
2213 ; RV32-NEXT: vadd.vv v8, v8, v8
2214 ; RV32-NEXT: vor.vv v8, v12, v8
2215 ; RV32-NEXT: addi sp, sp, 16
2216 ; RV32-NEXT: .cfi_def_cfa_offset 0
2219 ; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked:
2221 ; RV64-NEXT: lui a1, 4080
2222 ; RV64-NEXT: li a2, 255
2223 ; RV64-NEXT: li a3, 56
2224 ; RV64-NEXT: lui a4, 16
2225 ; RV64-NEXT: li a5, 40
2226 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2227 ; RV64-NEXT: vsrl.vi v16, v8, 24
2228 ; RV64-NEXT: vsrl.vi v20, v8, 8
2229 ; RV64-NEXT: addiw a0, a4, -256
2230 ; RV64-NEXT: vsrl.vx v12, v8, a3
2231 ; RV64-NEXT: vsrl.vx v24, v8, a5
2232 ; RV64-NEXT: vand.vx v24, v24, a0
2233 ; RV64-NEXT: vor.vv v12, v24, v12
2234 ; RV64-NEXT: vand.vx v24, v8, a1
2235 ; RV64-NEXT: slli a2, a2, 24
2236 ; RV64-NEXT: vand.vx v16, v16, a1
2237 ; RV64-NEXT: vsll.vi v24, v24, 24
2238 ; RV64-NEXT: vand.vx v20, v20, a2
2239 ; RV64-NEXT: vor.vv v16, v20, v16
2240 ; RV64-NEXT: vand.vx v20, v8, a2
2241 ; RV64-NEXT: vsll.vi v20, v20, 8
2242 ; RV64-NEXT: vor.vv v20, v24, v20
2243 ; RV64-NEXT: vsll.vx v24, v8, a3
2244 ; RV64-NEXT: vand.vx v8, v8, a0
2245 ; RV64-NEXT: vsll.vx v8, v8, a5
2246 ; RV64-NEXT: vor.vv v8, v24, v8
2247 ; RV64-NEXT: lui a0, 61681
2248 ; RV64-NEXT: lui a1, 209715
2249 ; RV64-NEXT: lui a2, 349525
2250 ; RV64-NEXT: addiw a0, a0, -241
2251 ; RV64-NEXT: addiw a1, a1, 819
2252 ; RV64-NEXT: addiw a2, a2, 1365
2253 ; RV64-NEXT: slli a3, a0, 32
2254 ; RV64-NEXT: slli a4, a1, 32
2255 ; RV64-NEXT: add a0, a0, a3
2256 ; RV64-NEXT: slli a3, a2, 32
2257 ; RV64-NEXT: add a1, a1, a4
2258 ; RV64-NEXT: add a2, a2, a3
2259 ; RV64-NEXT: vor.vv v12, v16, v12
2260 ; RV64-NEXT: vor.vv v8, v8, v20
2261 ; RV64-NEXT: vor.vv v8, v8, v12
2262 ; RV64-NEXT: vsrl.vi v12, v8, 4
2263 ; RV64-NEXT: vand.vx v8, v8, a0
2264 ; RV64-NEXT: vand.vx v12, v12, a0
2265 ; RV64-NEXT: vsll.vi v8, v8, 4
2266 ; RV64-NEXT: vor.vv v8, v12, v8
2267 ; RV64-NEXT: vsrl.vi v12, v8, 2
2268 ; RV64-NEXT: vand.vx v8, v8, a1
2269 ; RV64-NEXT: vand.vx v12, v12, a1
2270 ; RV64-NEXT: vsll.vi v8, v8, 2
2271 ; RV64-NEXT: vor.vv v8, v12, v8
2272 ; RV64-NEXT: vsrl.vi v12, v8, 1
2273 ; RV64-NEXT: vand.vx v8, v8, a2
2274 ; RV64-NEXT: vand.vx v12, v12, a2
2275 ; RV64-NEXT: vadd.vv v8, v8, v8
2276 ; RV64-NEXT: vor.vv v8, v12, v8
2279 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64_unmasked:
2280 ; CHECK-ZVBB: # %bb.0:
2281 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
2282 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
2283 ; CHECK-ZVBB-NEXT: ret
2284 %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
2285 ret <vscale x 4 x i64> %v
2288 declare <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32)
2290 define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
2291 ; RV32-LABEL: vp_bitreverse_nxv7i64:
2293 ; RV32-NEXT: addi sp, sp, -16
2294 ; RV32-NEXT: .cfi_def_cfa_offset 16
2295 ; RV32-NEXT: csrr a1, vlenb
2296 ; RV32-NEXT: li a2, 24
2297 ; RV32-NEXT: mul a1, a1, a2
2298 ; RV32-NEXT: sub sp, sp, a1
2299 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
2300 ; RV32-NEXT: lui a1, 1044480
2301 ; RV32-NEXT: li a2, 56
2302 ; RV32-NEXT: lui a3, 16
2303 ; RV32-NEXT: li a4, 40
2304 ; RV32-NEXT: addi a5, sp, 8
2305 ; RV32-NEXT: sw a1, 8(sp)
2306 ; RV32-NEXT: sw zero, 12(sp)
2307 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2308 ; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
2309 ; RV32-NEXT: addi a1, a3, -256
2310 ; RV32-NEXT: vand.vx v24, v8, a1, v0.t
2311 ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
2312 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2313 ; RV32-NEXT: csrr a3, vlenb
2314 ; RV32-NEXT: slli a3, a3, 4
2315 ; RV32-NEXT: add a3, sp, a3
2316 ; RV32-NEXT: addi a3, a3, 16
2317 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2318 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
2319 ; RV32-NEXT: vlse64.v v16, (a5), zero
2320 ; RV32-NEXT: csrr a3, vlenb
2321 ; RV32-NEXT: slli a3, a3, 3
2322 ; RV32-NEXT: add a3, sp, a3
2323 ; RV32-NEXT: addi a3, a3, 16
2324 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2325 ; RV32-NEXT: lui a3, 4080
2326 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2327 ; RV32-NEXT: vand.vx v24, v8, a3, v0.t
2328 ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
2329 ; RV32-NEXT: addi a5, sp, 16
2330 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2331 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
2332 ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
2333 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2334 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2335 ; RV32-NEXT: csrr a5, vlenb
2336 ; RV32-NEXT: slli a5, a5, 4
2337 ; RV32-NEXT: add a5, sp, a5
2338 ; RV32-NEXT: addi a5, a5, 16
2339 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2340 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2341 ; RV32-NEXT: csrr a5, vlenb
2342 ; RV32-NEXT: slli a5, a5, 4
2343 ; RV32-NEXT: add a5, sp, a5
2344 ; RV32-NEXT: addi a5, a5, 16
2345 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2346 ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
2347 ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
2348 ; RV32-NEXT: vand.vx v24, v24, a1, v0.t
2349 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2350 ; RV32-NEXT: addi a1, sp, 16
2351 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2352 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
2353 ; RV32-NEXT: vand.vx v24, v24, a3, v0.t
2354 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2355 ; RV32-NEXT: csrr a1, vlenb
2356 ; RV32-NEXT: slli a1, a1, 3
2357 ; RV32-NEXT: add a1, sp, a1
2358 ; RV32-NEXT: addi a1, a1, 16
2359 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2360 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2361 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
2362 ; RV32-NEXT: addi a1, sp, 16
2363 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2364 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
2365 ; RV32-NEXT: lui a1, 61681
2366 ; RV32-NEXT: lui a2, 209715
2367 ; RV32-NEXT: lui a3, 349525
2368 ; RV32-NEXT: addi a1, a1, -241
2369 ; RV32-NEXT: addi a2, a2, 819
2370 ; RV32-NEXT: addi a3, a3, 1365
2371 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2372 ; RV32-NEXT: vmv.v.x v24, a1
2373 ; RV32-NEXT: csrr a1, vlenb
2374 ; RV32-NEXT: slli a1, a1, 4
2375 ; RV32-NEXT: add a1, sp, a1
2376 ; RV32-NEXT: addi a1, a1, 16
2377 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2378 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2379 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2380 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2381 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2382 ; RV32-NEXT: vand.vv v24, v8, v24, v0.t
2383 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2384 ; RV32-NEXT: vmv.v.x v8, a2
2385 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2386 ; RV32-NEXT: vsll.vi v24, v24, 4, v0.t
2387 ; RV32-NEXT: vor.vv v24, v16, v24, v0.t
2388 ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
2389 ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2390 ; RV32-NEXT: vand.vv v24, v24, v8, v0.t
2391 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2392 ; RV32-NEXT: vmv.v.x v8, a3
2393 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2394 ; RV32-NEXT: vsll.vi v24, v24, 2, v0.t
2395 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2396 ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
2397 ; RV32-NEXT: vand.vv v24, v24, v8, v0.t
2398 ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
2399 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
2400 ; RV32-NEXT: vor.vv v8, v24, v8, v0.t
2401 ; RV32-NEXT: csrr a0, vlenb
2402 ; RV32-NEXT: li a1, 24
2403 ; RV32-NEXT: mul a0, a0, a1
2404 ; RV32-NEXT: add sp, sp, a0
2405 ; RV32-NEXT: .cfi_def_cfa sp, 16
2406 ; RV32-NEXT: addi sp, sp, 16
2407 ; RV32-NEXT: .cfi_def_cfa_offset 0
2410 ; RV64-LABEL: vp_bitreverse_nxv7i64:
2412 ; RV64-NEXT: addi sp, sp, -16
2413 ; RV64-NEXT: .cfi_def_cfa_offset 16
2414 ; RV64-NEXT: csrr a1, vlenb
2415 ; RV64-NEXT: slli a1, a1, 3
2416 ; RV64-NEXT: sub sp, sp, a1
2417 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2418 ; RV64-NEXT: lui a1, 4080
2419 ; RV64-NEXT: li a2, 255
2420 ; RV64-NEXT: li a3, 56
2421 ; RV64-NEXT: lui a4, 16
2422 ; RV64-NEXT: li a5, 40
2423 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2424 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
2425 ; RV64-NEXT: slli a2, a2, 24
2426 ; RV64-NEXT: addiw a0, a4, -256
2427 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
2428 ; RV64-NEXT: vand.vx v24, v8, a2, v0.t
2429 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
2430 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2431 ; RV64-NEXT: addi a4, sp, 16
2432 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2433 ; RV64-NEXT: vsll.vx v24, v8, a3, v0.t
2434 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
2435 ; RV64-NEXT: vsll.vx v16, v16, a5, v0.t
2436 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
2437 ; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
2438 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2439 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2440 ; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t
2441 ; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
2442 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2443 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
2444 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
2445 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
2446 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
2447 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
2448 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
2449 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
2450 ; RV64-NEXT: lui a0, 61681
2451 ; RV64-NEXT: lui a1, 209715
2452 ; RV64-NEXT: lui a2, 349525
2453 ; RV64-NEXT: addiw a0, a0, -241
2454 ; RV64-NEXT: addiw a1, a1, 819
2455 ; RV64-NEXT: addiw a2, a2, 1365
2456 ; RV64-NEXT: slli a3, a0, 32
2457 ; RV64-NEXT: slli a4, a1, 32
2458 ; RV64-NEXT: add a0, a0, a3
2459 ; RV64-NEXT: slli a3, a2, 32
2460 ; RV64-NEXT: add a1, a1, a4
2461 ; RV64-NEXT: add a2, a2, a3
2462 ; RV64-NEXT: addi a3, sp, 16
2463 ; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2464 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2465 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
2466 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2467 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2468 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
2469 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2470 ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
2471 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
2472 ; RV64-NEXT: vand.vx v8, v8, a1, v0.t
2473 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
2474 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2475 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
2476 ; RV64-NEXT: vand.vx v16, v16, a2, v0.t
2477 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
2478 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
2479 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2480 ; RV64-NEXT: csrr a0, vlenb
2481 ; RV64-NEXT: slli a0, a0, 3
2482 ; RV64-NEXT: add sp, sp, a0
2483 ; RV64-NEXT: .cfi_def_cfa sp, 16
2484 ; RV64-NEXT: addi sp, sp, 16
2485 ; RV64-NEXT: .cfi_def_cfa_offset 0
2488 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64:
2489 ; CHECK-ZVBB: # %bb.0:
2490 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2491 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
2492 ; CHECK-ZVBB-NEXT: ret
2493 %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
2494 ret <vscale x 7 x i64> %v
2497 define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
2498 ; RV32-LABEL: vp_bitreverse_nxv7i64_unmasked:
2500 ; RV32-NEXT: addi sp, sp, -16
2501 ; RV32-NEXT: .cfi_def_cfa_offset 16
2502 ; RV32-NEXT: csrr a1, vlenb
2503 ; RV32-NEXT: slli a1, a1, 4
2504 ; RV32-NEXT: sub sp, sp, a1
2505 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2506 ; RV32-NEXT: lui a1, 1044480
2507 ; RV32-NEXT: li a2, 56
2508 ; RV32-NEXT: lui a3, 16
2509 ; RV32-NEXT: li a4, 40
2510 ; RV32-NEXT: lui a5, 4080
2511 ; RV32-NEXT: addi a6, sp, 8
2512 ; RV32-NEXT: sw a1, 8(sp)
2513 ; RV32-NEXT: sw zero, 12(sp)
2514 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2515 ; RV32-NEXT: vsll.vx v16, v8, a2
2516 ; RV32-NEXT: addi a1, a3, -256
2517 ; RV32-NEXT: vsrl.vx v24, v8, a2
2518 ; RV32-NEXT: vsrl.vx v0, v8, a4
2519 ; RV32-NEXT: vand.vx v0, v0, a1
2520 ; RV32-NEXT: vor.vv v24, v0, v24
2521 ; RV32-NEXT: addi a2, sp, 16
2522 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
2523 ; RV32-NEXT: vand.vx v24, v8, a1
2524 ; RV32-NEXT: vsll.vx v24, v24, a4
2525 ; RV32-NEXT: vor.vv v16, v16, v24
2526 ; RV32-NEXT: csrr a1, vlenb
2527 ; RV32-NEXT: slli a1, a1, 3
2528 ; RV32-NEXT: add a1, sp, a1
2529 ; RV32-NEXT: addi a1, a1, 16
2530 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2531 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2532 ; RV32-NEXT: vlse64.v v24, (a6), zero
2533 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2534 ; RV32-NEXT: vsrl.vi v16, v8, 24
2535 ; RV32-NEXT: vand.vx v16, v16, a5
2536 ; RV32-NEXT: vsrl.vi v0, v8, 8
2537 ; RV32-NEXT: vand.vv v0, v0, v24
2538 ; RV32-NEXT: vor.vv v16, v0, v16
2539 ; RV32-NEXT: vand.vv v24, v8, v24
2540 ; RV32-NEXT: vand.vx v8, v8, a5
2541 ; RV32-NEXT: vsll.vi v8, v8, 24
2542 ; RV32-NEXT: vsll.vi v24, v24, 8
2543 ; RV32-NEXT: vor.vv v24, v8, v24
2544 ; RV32-NEXT: addi a1, sp, 16
2545 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2546 ; RV32-NEXT: vor.vv v8, v16, v8
2547 ; RV32-NEXT: lui a1, 61681
2548 ; RV32-NEXT: lui a2, 209715
2549 ; RV32-NEXT: lui a3, 349525
2550 ; RV32-NEXT: addi a1, a1, -241
2551 ; RV32-NEXT: addi a2, a2, 819
2552 ; RV32-NEXT: addi a3, a3, 1365
2553 ; RV32-NEXT: csrr a4, vlenb
2554 ; RV32-NEXT: slli a4, a4, 3
2555 ; RV32-NEXT: add a4, sp, a4
2556 ; RV32-NEXT: addi a4, a4, 16
2557 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
2558 ; RV32-NEXT: vor.vv v16, v16, v24
2559 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2560 ; RV32-NEXT: vmv.v.x v24, a1
2561 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2562 ; RV32-NEXT: vor.vv v8, v16, v8
2563 ; RV32-NEXT: vsrl.vi v16, v8, 4
2564 ; RV32-NEXT: vand.vv v8, v8, v24
2565 ; RV32-NEXT: vand.vv v16, v16, v24
2566 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2567 ; RV32-NEXT: vmv.v.x v24, a2
2568 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2569 ; RV32-NEXT: vsll.vi v8, v8, 4
2570 ; RV32-NEXT: vor.vv v8, v16, v8
2571 ; RV32-NEXT: vsrl.vi v16, v8, 2
2572 ; RV32-NEXT: vand.vv v8, v8, v24
2573 ; RV32-NEXT: vand.vv v16, v16, v24
2574 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2575 ; RV32-NEXT: vmv.v.x v24, a3
2576 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2577 ; RV32-NEXT: vsll.vi v8, v8, 2
2578 ; RV32-NEXT: vor.vv v8, v16, v8
2579 ; RV32-NEXT: vsrl.vi v16, v8, 1
2580 ; RV32-NEXT: vand.vv v8, v8, v24
2581 ; RV32-NEXT: vand.vv v16, v16, v24
2582 ; RV32-NEXT: vadd.vv v8, v8, v8
2583 ; RV32-NEXT: vor.vv v8, v16, v8
2584 ; RV32-NEXT: csrr a0, vlenb
2585 ; RV32-NEXT: slli a0, a0, 4
2586 ; RV32-NEXT: add sp, sp, a0
2587 ; RV32-NEXT: .cfi_def_cfa sp, 16
2588 ; RV32-NEXT: addi sp, sp, 16
2589 ; RV32-NEXT: .cfi_def_cfa_offset 0
2592 ; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked:
2594 ; RV64-NEXT: addi sp, sp, -16
2595 ; RV64-NEXT: .cfi_def_cfa_offset 16
2596 ; RV64-NEXT: csrr a1, vlenb
2597 ; RV64-NEXT: slli a1, a1, 3
2598 ; RV64-NEXT: sub sp, sp, a1
2599 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2600 ; RV64-NEXT: lui a1, 4080
2601 ; RV64-NEXT: li a2, 255
2602 ; RV64-NEXT: li a3, 56
2603 ; RV64-NEXT: lui a4, 16
2604 ; RV64-NEXT: li a5, 40
2605 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2606 ; RV64-NEXT: vsrl.vi v24, v8, 24
2607 ; RV64-NEXT: addiw a0, a4, -256
2608 ; RV64-NEXT: vsrl.vx v16, v8, a3
2609 ; RV64-NEXT: vsrl.vx v0, v8, a5
2610 ; RV64-NEXT: vand.vx v0, v0, a0
2611 ; RV64-NEXT: vor.vv v16, v0, v16
2612 ; RV64-NEXT: addi a4, sp, 16
2613 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2614 ; RV64-NEXT: vsrl.vi v0, v8, 8
2615 ; RV64-NEXT: slli a2, a2, 24
2616 ; RV64-NEXT: vand.vx v24, v24, a1
2617 ; RV64-NEXT: vand.vx v0, v0, a2
2618 ; RV64-NEXT: vor.vv v24, v0, v24
2619 ; RV64-NEXT: vand.vx v0, v8, a1
2620 ; RV64-NEXT: vsll.vi v0, v0, 24
2621 ; RV64-NEXT: vand.vx v16, v8, a2
2622 ; RV64-NEXT: vsll.vi v16, v16, 8
2623 ; RV64-NEXT: vor.vv v0, v0, v16
2624 ; RV64-NEXT: vsll.vx v16, v8, a3
2625 ; RV64-NEXT: vand.vx v8, v8, a0
2626 ; RV64-NEXT: vsll.vx v8, v8, a5
2627 ; RV64-NEXT: vor.vv v8, v16, v8
2628 ; RV64-NEXT: lui a0, 61681
2629 ; RV64-NEXT: lui a1, 209715
2630 ; RV64-NEXT: lui a2, 349525
2631 ; RV64-NEXT: addiw a0, a0, -241
2632 ; RV64-NEXT: addiw a1, a1, 819
2633 ; RV64-NEXT: addiw a2, a2, 1365
2634 ; RV64-NEXT: slli a3, a0, 32
2635 ; RV64-NEXT: slli a4, a1, 32
2636 ; RV64-NEXT: add a0, a0, a3
2637 ; RV64-NEXT: slli a3, a2, 32
2638 ; RV64-NEXT: add a1, a1, a4
2639 ; RV64-NEXT: add a2, a2, a3
2640 ; RV64-NEXT: addi a3, sp, 16
2641 ; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2642 ; RV64-NEXT: vor.vv v16, v24, v16
2643 ; RV64-NEXT: vor.vv v8, v8, v0
2644 ; RV64-NEXT: vor.vv v8, v8, v16
2645 ; RV64-NEXT: vsrl.vi v16, v8, 4
2646 ; RV64-NEXT: vand.vx v8, v8, a0
2647 ; RV64-NEXT: vand.vx v16, v16, a0
2648 ; RV64-NEXT: vsll.vi v8, v8, 4
2649 ; RV64-NEXT: vor.vv v8, v16, v8
2650 ; RV64-NEXT: vsrl.vi v16, v8, 2
2651 ; RV64-NEXT: vand.vx v8, v8, a1
2652 ; RV64-NEXT: vand.vx v16, v16, a1
2653 ; RV64-NEXT: vsll.vi v8, v8, 2
2654 ; RV64-NEXT: vor.vv v8, v16, v8
2655 ; RV64-NEXT: vsrl.vi v16, v8, 1
2656 ; RV64-NEXT: vand.vx v8, v8, a2
2657 ; RV64-NEXT: vand.vx v16, v16, a2
2658 ; RV64-NEXT: vadd.vv v8, v8, v8
2659 ; RV64-NEXT: vor.vv v8, v16, v8
2660 ; RV64-NEXT: csrr a0, vlenb
2661 ; RV64-NEXT: slli a0, a0, 3
2662 ; RV64-NEXT: add sp, sp, a0
2663 ; RV64-NEXT: .cfi_def_cfa sp, 16
2664 ; RV64-NEXT: addi sp, sp, 16
2665 ; RV64-NEXT: .cfi_def_cfa_offset 0
2668 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked:
2669 ; CHECK-ZVBB: # %bb.0:
2670 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2671 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
2672 ; CHECK-ZVBB-NEXT: ret
2673 %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
2674 ret <vscale x 7 x i64> %v
2677 declare <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2679 define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2680 ; RV32-LABEL: vp_bitreverse_nxv8i64:
2682 ; RV32-NEXT: addi sp, sp, -16
2683 ; RV32-NEXT: .cfi_def_cfa_offset 16
2684 ; RV32-NEXT: csrr a1, vlenb
2685 ; RV32-NEXT: li a2, 24
2686 ; RV32-NEXT: mul a1, a1, a2
2687 ; RV32-NEXT: sub sp, sp, a1
2688 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
2689 ; RV32-NEXT: lui a1, 1044480
2690 ; RV32-NEXT: li a2, 56
2691 ; RV32-NEXT: lui a3, 16
2692 ; RV32-NEXT: li a4, 40
2693 ; RV32-NEXT: addi a5, sp, 8
2694 ; RV32-NEXT: sw a1, 8(sp)
2695 ; RV32-NEXT: sw zero, 12(sp)
2696 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2697 ; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
2698 ; RV32-NEXT: addi a1, a3, -256
2699 ; RV32-NEXT: vand.vx v24, v8, a1, v0.t
2700 ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
2701 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2702 ; RV32-NEXT: csrr a3, vlenb
2703 ; RV32-NEXT: slli a3, a3, 4
2704 ; RV32-NEXT: add a3, sp, a3
2705 ; RV32-NEXT: addi a3, a3, 16
2706 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2707 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
2708 ; RV32-NEXT: vlse64.v v16, (a5), zero
2709 ; RV32-NEXT: csrr a3, vlenb
2710 ; RV32-NEXT: slli a3, a3, 3
2711 ; RV32-NEXT: add a3, sp, a3
2712 ; RV32-NEXT: addi a3, a3, 16
2713 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2714 ; RV32-NEXT: lui a3, 4080
2715 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2716 ; RV32-NEXT: vand.vx v24, v8, a3, v0.t
2717 ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
2718 ; RV32-NEXT: addi a5, sp, 16
2719 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2720 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
2721 ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
2722 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2723 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2724 ; RV32-NEXT: csrr a5, vlenb
2725 ; RV32-NEXT: slli a5, a5, 4
2726 ; RV32-NEXT: add a5, sp, a5
2727 ; RV32-NEXT: addi a5, a5, 16
2728 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2729 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2730 ; RV32-NEXT: csrr a5, vlenb
2731 ; RV32-NEXT: slli a5, a5, 4
2732 ; RV32-NEXT: add a5, sp, a5
2733 ; RV32-NEXT: addi a5, a5, 16
2734 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
2735 ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
2736 ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
2737 ; RV32-NEXT: vand.vx v24, v24, a1, v0.t
2738 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2739 ; RV32-NEXT: addi a1, sp, 16
2740 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2741 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
2742 ; RV32-NEXT: vand.vx v24, v24, a3, v0.t
2743 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
2744 ; RV32-NEXT: csrr a1, vlenb
2745 ; RV32-NEXT: slli a1, a1, 3
2746 ; RV32-NEXT: add a1, sp, a1
2747 ; RV32-NEXT: addi a1, a1, 16
2748 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2749 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2750 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
2751 ; RV32-NEXT: addi a1, sp, 16
2752 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2753 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
2754 ; RV32-NEXT: lui a1, 61681
2755 ; RV32-NEXT: lui a2, 209715
2756 ; RV32-NEXT: lui a3, 349525
2757 ; RV32-NEXT: addi a1, a1, -241
2758 ; RV32-NEXT: addi a2, a2, 819
2759 ; RV32-NEXT: addi a3, a3, 1365
2760 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2761 ; RV32-NEXT: vmv.v.x v24, a1
2762 ; RV32-NEXT: csrr a1, vlenb
2763 ; RV32-NEXT: slli a1, a1, 4
2764 ; RV32-NEXT: add a1, sp, a1
2765 ; RV32-NEXT: addi a1, a1, 16
2766 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2767 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2768 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
2769 ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
2770 ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2771 ; RV32-NEXT: vand.vv v24, v8, v24, v0.t
2772 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2773 ; RV32-NEXT: vmv.v.x v8, a2
2774 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2775 ; RV32-NEXT: vsll.vi v24, v24, 4, v0.t
2776 ; RV32-NEXT: vor.vv v24, v16, v24, v0.t
2777 ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
2778 ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2779 ; RV32-NEXT: vand.vv v24, v24, v8, v0.t
2780 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2781 ; RV32-NEXT: vmv.v.x v8, a3
2782 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2783 ; RV32-NEXT: vsll.vi v24, v24, 2, v0.t
2784 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
2785 ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
2786 ; RV32-NEXT: vand.vv v24, v24, v8, v0.t
2787 ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
2788 ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
2789 ; RV32-NEXT: vor.vv v8, v24, v8, v0.t
2790 ; RV32-NEXT: csrr a0, vlenb
2791 ; RV32-NEXT: li a1, 24
2792 ; RV32-NEXT: mul a0, a0, a1
2793 ; RV32-NEXT: add sp, sp, a0
2794 ; RV32-NEXT: .cfi_def_cfa sp, 16
2795 ; RV32-NEXT: addi sp, sp, 16
2796 ; RV32-NEXT: .cfi_def_cfa_offset 0
2799 ; RV64-LABEL: vp_bitreverse_nxv8i64:
2801 ; RV64-NEXT: addi sp, sp, -16
2802 ; RV64-NEXT: .cfi_def_cfa_offset 16
2803 ; RV64-NEXT: csrr a1, vlenb
2804 ; RV64-NEXT: slli a1, a1, 3
2805 ; RV64-NEXT: sub sp, sp, a1
2806 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2807 ; RV64-NEXT: lui a1, 4080
2808 ; RV64-NEXT: li a2, 255
2809 ; RV64-NEXT: li a3, 56
2810 ; RV64-NEXT: lui a4, 16
2811 ; RV64-NEXT: li a5, 40
2812 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2813 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
2814 ; RV64-NEXT: slli a2, a2, 24
2815 ; RV64-NEXT: addiw a0, a4, -256
2816 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
2817 ; RV64-NEXT: vand.vx v24, v8, a2, v0.t
2818 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
2819 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2820 ; RV64-NEXT: addi a4, sp, 16
2821 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2822 ; RV64-NEXT: vsll.vx v24, v8, a3, v0.t
2823 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
2824 ; RV64-NEXT: vsll.vx v16, v16, a5, v0.t
2825 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
2826 ; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
2827 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
2828 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
2829 ; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t
2830 ; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
2831 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2832 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
2833 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
2834 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
2835 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
2836 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
2837 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
2838 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
2839 ; RV64-NEXT: lui a0, 61681
2840 ; RV64-NEXT: lui a1, 209715
2841 ; RV64-NEXT: lui a2, 349525
2842 ; RV64-NEXT: addiw a0, a0, -241
2843 ; RV64-NEXT: addiw a1, a1, 819
2844 ; RV64-NEXT: addiw a2, a2, 1365
2845 ; RV64-NEXT: slli a3, a0, 32
2846 ; RV64-NEXT: slli a4, a1, 32
2847 ; RV64-NEXT: add a0, a0, a3
2848 ; RV64-NEXT: slli a3, a2, 32
2849 ; RV64-NEXT: add a1, a1, a4
2850 ; RV64-NEXT: add a2, a2, a3
2851 ; RV64-NEXT: addi a3, sp, 16
2852 ; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2853 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2854 ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
2855 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
2856 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
2857 ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
2858 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2859 ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
2860 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
2861 ; RV64-NEXT: vand.vx v8, v8, a1, v0.t
2862 ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
2863 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2864 ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
2865 ; RV64-NEXT: vand.vx v16, v16, a2, v0.t
2866 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
2867 ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
2868 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
2869 ; RV64-NEXT: csrr a0, vlenb
2870 ; RV64-NEXT: slli a0, a0, 3
2871 ; RV64-NEXT: add sp, sp, a0
2872 ; RV64-NEXT: .cfi_def_cfa sp, 16
2873 ; RV64-NEXT: addi sp, sp, 16
2874 ; RV64-NEXT: .cfi_def_cfa_offset 0
2877 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64:
2878 ; CHECK-ZVBB: # %bb.0:
2879 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2880 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
2881 ; CHECK-ZVBB-NEXT: ret
2882 %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
2883 ret <vscale x 8 x i64> %v
2886 define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
2887 ; RV32-LABEL: vp_bitreverse_nxv8i64_unmasked:
2889 ; RV32-NEXT: addi sp, sp, -16
2890 ; RV32-NEXT: .cfi_def_cfa_offset 16
2891 ; RV32-NEXT: csrr a1, vlenb
2892 ; RV32-NEXT: slli a1, a1, 4
2893 ; RV32-NEXT: sub sp, sp, a1
2894 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2895 ; RV32-NEXT: lui a1, 1044480
2896 ; RV32-NEXT: li a2, 56
2897 ; RV32-NEXT: lui a3, 16
2898 ; RV32-NEXT: li a4, 40
2899 ; RV32-NEXT: lui a5, 4080
2900 ; RV32-NEXT: addi a6, sp, 8
2901 ; RV32-NEXT: sw a1, 8(sp)
2902 ; RV32-NEXT: sw zero, 12(sp)
2903 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2904 ; RV32-NEXT: vsll.vx v16, v8, a2
2905 ; RV32-NEXT: addi a1, a3, -256
2906 ; RV32-NEXT: vsrl.vx v24, v8, a2
2907 ; RV32-NEXT: vsrl.vx v0, v8, a4
2908 ; RV32-NEXT: vand.vx v0, v0, a1
2909 ; RV32-NEXT: vor.vv v24, v0, v24
2910 ; RV32-NEXT: addi a2, sp, 16
2911 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
2912 ; RV32-NEXT: vand.vx v24, v8, a1
2913 ; RV32-NEXT: vsll.vx v24, v24, a4
2914 ; RV32-NEXT: vor.vv v16, v16, v24
2915 ; RV32-NEXT: csrr a1, vlenb
2916 ; RV32-NEXT: slli a1, a1, 3
2917 ; RV32-NEXT: add a1, sp, a1
2918 ; RV32-NEXT: addi a1, a1, 16
2919 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2920 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2921 ; RV32-NEXT: vlse64.v v24, (a6), zero
2922 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2923 ; RV32-NEXT: vsrl.vi v16, v8, 24
2924 ; RV32-NEXT: vand.vx v16, v16, a5
2925 ; RV32-NEXT: vsrl.vi v0, v8, 8
2926 ; RV32-NEXT: vand.vv v0, v0, v24
2927 ; RV32-NEXT: vor.vv v16, v0, v16
2928 ; RV32-NEXT: vand.vv v24, v8, v24
2929 ; RV32-NEXT: vand.vx v8, v8, a5
2930 ; RV32-NEXT: vsll.vi v8, v8, 24
2931 ; RV32-NEXT: vsll.vi v24, v24, 8
2932 ; RV32-NEXT: vor.vv v24, v8, v24
2933 ; RV32-NEXT: addi a1, sp, 16
2934 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2935 ; RV32-NEXT: vor.vv v8, v16, v8
2936 ; RV32-NEXT: lui a1, 61681
2937 ; RV32-NEXT: lui a2, 209715
2938 ; RV32-NEXT: lui a3, 349525
2939 ; RV32-NEXT: addi a1, a1, -241
2940 ; RV32-NEXT: addi a2, a2, 819
2941 ; RV32-NEXT: addi a3, a3, 1365
2942 ; RV32-NEXT: csrr a4, vlenb
2943 ; RV32-NEXT: slli a4, a4, 3
2944 ; RV32-NEXT: add a4, sp, a4
2945 ; RV32-NEXT: addi a4, a4, 16
2946 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
2947 ; RV32-NEXT: vor.vv v16, v16, v24
2948 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2949 ; RV32-NEXT: vmv.v.x v24, a1
2950 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2951 ; RV32-NEXT: vor.vv v8, v16, v8
2952 ; RV32-NEXT: vsrl.vi v16, v8, 4
2953 ; RV32-NEXT: vand.vv v8, v8, v24
2954 ; RV32-NEXT: vand.vv v16, v16, v24
2955 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2956 ; RV32-NEXT: vmv.v.x v24, a2
2957 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2958 ; RV32-NEXT: vsll.vi v8, v8, 4
2959 ; RV32-NEXT: vor.vv v8, v16, v8
2960 ; RV32-NEXT: vsrl.vi v16, v8, 2
2961 ; RV32-NEXT: vand.vv v8, v8, v24
2962 ; RV32-NEXT: vand.vv v16, v16, v24
2963 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2964 ; RV32-NEXT: vmv.v.x v24, a3
2965 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2966 ; RV32-NEXT: vsll.vi v8, v8, 2
2967 ; RV32-NEXT: vor.vv v8, v16, v8
2968 ; RV32-NEXT: vsrl.vi v16, v8, 1
2969 ; RV32-NEXT: vand.vv v8, v8, v24
2970 ; RV32-NEXT: vand.vv v16, v16, v24
2971 ; RV32-NEXT: vadd.vv v8, v8, v8
2972 ; RV32-NEXT: vor.vv v8, v16, v8
2973 ; RV32-NEXT: csrr a0, vlenb
2974 ; RV32-NEXT: slli a0, a0, 4
2975 ; RV32-NEXT: add sp, sp, a0
2976 ; RV32-NEXT: .cfi_def_cfa sp, 16
2977 ; RV32-NEXT: addi sp, sp, 16
2978 ; RV32-NEXT: .cfi_def_cfa_offset 0
2981 ; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked:
2983 ; RV64-NEXT: addi sp, sp, -16
2984 ; RV64-NEXT: .cfi_def_cfa_offset 16
2985 ; RV64-NEXT: csrr a1, vlenb
2986 ; RV64-NEXT: slli a1, a1, 3
2987 ; RV64-NEXT: sub sp, sp, a1
2988 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2989 ; RV64-NEXT: lui a1, 4080
2990 ; RV64-NEXT: li a2, 255
2991 ; RV64-NEXT: li a3, 56
2992 ; RV64-NEXT: lui a4, 16
2993 ; RV64-NEXT: li a5, 40
2994 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2995 ; RV64-NEXT: vsrl.vi v24, v8, 24
2996 ; RV64-NEXT: addiw a0, a4, -256
2997 ; RV64-NEXT: vsrl.vx v16, v8, a3
2998 ; RV64-NEXT: vsrl.vx v0, v8, a5
2999 ; RV64-NEXT: vand.vx v0, v0, a0
3000 ; RV64-NEXT: vor.vv v16, v0, v16
3001 ; RV64-NEXT: addi a4, sp, 16
3002 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
3003 ; RV64-NEXT: vsrl.vi v0, v8, 8
3004 ; RV64-NEXT: slli a2, a2, 24
3005 ; RV64-NEXT: vand.vx v24, v24, a1
3006 ; RV64-NEXT: vand.vx v0, v0, a2
3007 ; RV64-NEXT: vor.vv v24, v0, v24
3008 ; RV64-NEXT: vand.vx v0, v8, a1
3009 ; RV64-NEXT: vsll.vi v0, v0, 24
3010 ; RV64-NEXT: vand.vx v16, v8, a2
3011 ; RV64-NEXT: vsll.vi v16, v16, 8
3012 ; RV64-NEXT: vor.vv v0, v0, v16
3013 ; RV64-NEXT: vsll.vx v16, v8, a3
3014 ; RV64-NEXT: vand.vx v8, v8, a0
3015 ; RV64-NEXT: vsll.vx v8, v8, a5
3016 ; RV64-NEXT: vor.vv v8, v16, v8
3017 ; RV64-NEXT: lui a0, 61681
3018 ; RV64-NEXT: lui a1, 209715
3019 ; RV64-NEXT: lui a2, 349525
3020 ; RV64-NEXT: addiw a0, a0, -241
3021 ; RV64-NEXT: addiw a1, a1, 819
3022 ; RV64-NEXT: addiw a2, a2, 1365
3023 ; RV64-NEXT: slli a3, a0, 32
3024 ; RV64-NEXT: slli a4, a1, 32
3025 ; RV64-NEXT: add a0, a0, a3
3026 ; RV64-NEXT: slli a3, a2, 32
3027 ; RV64-NEXT: add a1, a1, a4
3028 ; RV64-NEXT: add a2, a2, a3
3029 ; RV64-NEXT: addi a3, sp, 16
3030 ; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
3031 ; RV64-NEXT: vor.vv v16, v24, v16
3032 ; RV64-NEXT: vor.vv v8, v8, v0
3033 ; RV64-NEXT: vor.vv v8, v8, v16
3034 ; RV64-NEXT: vsrl.vi v16, v8, 4
3035 ; RV64-NEXT: vand.vx v8, v8, a0
3036 ; RV64-NEXT: vand.vx v16, v16, a0
3037 ; RV64-NEXT: vsll.vi v8, v8, 4
3038 ; RV64-NEXT: vor.vv v8, v16, v8
3039 ; RV64-NEXT: vsrl.vi v16, v8, 2
3040 ; RV64-NEXT: vand.vx v8, v8, a1
3041 ; RV64-NEXT: vand.vx v16, v16, a1
3042 ; RV64-NEXT: vsll.vi v8, v8, 2
3043 ; RV64-NEXT: vor.vv v8, v16, v8
3044 ; RV64-NEXT: vsrl.vi v16, v8, 1
3045 ; RV64-NEXT: vand.vx v8, v8, a2
3046 ; RV64-NEXT: vand.vx v16, v16, a2
3047 ; RV64-NEXT: vadd.vv v8, v8, v8
3048 ; RV64-NEXT: vor.vv v8, v16, v8
3049 ; RV64-NEXT: csrr a0, vlenb
3050 ; RV64-NEXT: slli a0, a0, 3
3051 ; RV64-NEXT: add sp, sp, a0
3052 ; RV64-NEXT: .cfi_def_cfa sp, 16
3053 ; RV64-NEXT: addi sp, sp, 16
3054 ; RV64-NEXT: .cfi_def_cfa_offset 0
3057 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked:
3058 ; CHECK-ZVBB: # %bb.0:
3059 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
3060 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
3061 ; CHECK-ZVBB-NEXT: ret
3062 %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
3063 ret <vscale x 8 x i64> %v
3066 ; Test splitting. Use i16 version for easier check.
3067 declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32)
3069 define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
3070 ; CHECK-LABEL: vp_bitreverse_nxv64i16:
3072 ; CHECK-NEXT: addi sp, sp, -16
3073 ; CHECK-NEXT: .cfi_def_cfa_offset 16
3074 ; CHECK-NEXT: csrr a1, vlenb
3075 ; CHECK-NEXT: slli a1, a1, 4
3076 ; CHECK-NEXT: sub sp, sp, a1
3077 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3078 ; CHECK-NEXT: vmv1r.v v24, v0
3079 ; CHECK-NEXT: csrr a1, vlenb
3080 ; CHECK-NEXT: slli a1, a1, 3
3081 ; CHECK-NEXT: add a1, sp, a1
3082 ; CHECK-NEXT: addi a1, a1, 16
3083 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
3084 ; CHECK-NEXT: csrr a3, vlenb
3085 ; CHECK-NEXT: lui a1, 1
3086 ; CHECK-NEXT: lui a2, 3
3087 ; CHECK-NEXT: srli a4, a3, 1
3088 ; CHECK-NEXT: slli a3, a3, 2
3089 ; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
3090 ; CHECK-NEXT: vslidedown.vx v0, v0, a4
3091 ; CHECK-NEXT: sub a4, a0, a3
3092 ; CHECK-NEXT: sltu a5, a0, a4
3093 ; CHECK-NEXT: addi a5, a5, -1
3094 ; CHECK-NEXT: and a5, a5, a4
3095 ; CHECK-NEXT: lui a6, 5
3096 ; CHECK-NEXT: addi a4, a1, -241
3097 ; CHECK-NEXT: addi a2, a2, 819
3098 ; CHECK-NEXT: addi a1, a6, 1365
3099 ; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma
3100 ; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t
3101 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
3102 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3103 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
3104 ; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3105 ; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
3106 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3107 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3108 ; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
3109 ; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
3110 ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
3111 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3112 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3113 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
3114 ; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3115 ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3116 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3117 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3118 ; CHECK-NEXT: addi a5, sp, 16
3119 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
3120 ; CHECK-NEXT: bltu a0, a3, .LBB46_2
3121 ; CHECK-NEXT: # %bb.1:
3122 ; CHECK-NEXT: mv a0, a3
3123 ; CHECK-NEXT: .LBB46_2:
3124 ; CHECK-NEXT: vmv1r.v v0, v24
3125 ; CHECK-NEXT: csrr a3, vlenb
3126 ; CHECK-NEXT: slli a3, a3, 3
3127 ; CHECK-NEXT: add a3, sp, a3
3128 ; CHECK-NEXT: addi a3, a3, 16
3129 ; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
3130 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3131 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
3132 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
3133 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
3134 ; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
3135 ; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3136 ; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
3137 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3138 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3139 ; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
3140 ; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
3141 ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
3142 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3143 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3144 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
3145 ; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3146 ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3147 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3148 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3149 ; CHECK-NEXT: addi a0, sp, 16
3150 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
3151 ; CHECK-NEXT: csrr a0, vlenb
3152 ; CHECK-NEXT: slli a0, a0, 4
3153 ; CHECK-NEXT: add sp, sp, a0
3154 ; CHECK-NEXT: .cfi_def_cfa sp, 16
3155 ; CHECK-NEXT: addi sp, sp, 16
3156 ; CHECK-NEXT: .cfi_def_cfa_offset 0
3159 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
3160 ; CHECK-ZVBB: # %bb.0:
3161 ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0
3162 ; CHECK-ZVBB-NEXT: csrr a1, vlenb
3163 ; CHECK-ZVBB-NEXT: srli a2, a1, 1
3164 ; CHECK-ZVBB-NEXT: slli a1, a1, 2
3165 ; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
3166 ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2
3167 ; CHECK-ZVBB-NEXT: sub a2, a0, a1
3168 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2
3169 ; CHECK-ZVBB-NEXT: addi a3, a3, -1
3170 ; CHECK-ZVBB-NEXT: and a2, a3, a2
3171 ; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma
3172 ; CHECK-ZVBB-NEXT: vbrev.v v16, v16, v0.t
3173 ; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2
3174 ; CHECK-ZVBB-NEXT: # %bb.1:
3175 ; CHECK-ZVBB-NEXT: mv a0, a1
3176 ; CHECK-ZVBB-NEXT: .LBB46_2:
3177 ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
3178 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3179 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
3180 ; CHECK-ZVBB-NEXT: ret
3181 %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
3182 ret <vscale x 64 x i16> %v
3185 define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) {
3186 ; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked:
3188 ; CHECK-NEXT: csrr a3, vlenb
3189 ; CHECK-NEXT: lui a1, 1
3190 ; CHECK-NEXT: lui a2, 3
3191 ; CHECK-NEXT: slli a3, a3, 2
3192 ; CHECK-NEXT: sub a4, a0, a3
3193 ; CHECK-NEXT: sltu a5, a0, a4
3194 ; CHECK-NEXT: addi a5, a5, -1
3195 ; CHECK-NEXT: and a5, a5, a4
3196 ; CHECK-NEXT: lui a6, 5
3197 ; CHECK-NEXT: addi a4, a1, -241
3198 ; CHECK-NEXT: addi a2, a2, 819
3199 ; CHECK-NEXT: addi a1, a6, 1365
3200 ; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma
3201 ; CHECK-NEXT: vsrl.vi v24, v16, 8
3202 ; CHECK-NEXT: vsll.vi v16, v16, 8
3203 ; CHECK-NEXT: vor.vv v16, v16, v24
3204 ; CHECK-NEXT: vsrl.vi v24, v16, 4
3205 ; CHECK-NEXT: vand.vx v16, v16, a4
3206 ; CHECK-NEXT: vand.vx v24, v24, a4
3207 ; CHECK-NEXT: vsll.vi v16, v16, 4
3208 ; CHECK-NEXT: vor.vv v16, v24, v16
3209 ; CHECK-NEXT: vsrl.vi v24, v16, 2
3210 ; CHECK-NEXT: vand.vx v16, v16, a2
3211 ; CHECK-NEXT: vand.vx v24, v24, a2
3212 ; CHECK-NEXT: vsll.vi v16, v16, 2
3213 ; CHECK-NEXT: vor.vv v16, v24, v16
3214 ; CHECK-NEXT: vsrl.vi v24, v16, 1
3215 ; CHECK-NEXT: vand.vx v16, v16, a1
3216 ; CHECK-NEXT: vand.vx v24, v24, a1
3217 ; CHECK-NEXT: vadd.vv v16, v16, v16
3218 ; CHECK-NEXT: vor.vv v16, v24, v16
3219 ; CHECK-NEXT: bltu a0, a3, .LBB47_2
3220 ; CHECK-NEXT: # %bb.1:
3221 ; CHECK-NEXT: mv a0, a3
3222 ; CHECK-NEXT: .LBB47_2:
3223 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3224 ; CHECK-NEXT: vsrl.vi v24, v8, 8
3225 ; CHECK-NEXT: vsll.vi v8, v8, 8
3226 ; CHECK-NEXT: vor.vv v8, v8, v24
3227 ; CHECK-NEXT: vsrl.vi v24, v8, 4
3228 ; CHECK-NEXT: vand.vx v8, v8, a4
3229 ; CHECK-NEXT: vand.vx v24, v24, a4
3230 ; CHECK-NEXT: vsll.vi v8, v8, 4
3231 ; CHECK-NEXT: vor.vv v8, v24, v8
3232 ; CHECK-NEXT: vsrl.vi v24, v8, 2
3233 ; CHECK-NEXT: vand.vx v8, v8, a2
3234 ; CHECK-NEXT: vand.vx v24, v24, a2
3235 ; CHECK-NEXT: vsll.vi v8, v8, 2
3236 ; CHECK-NEXT: vor.vv v8, v24, v8
3237 ; CHECK-NEXT: vsrl.vi v24, v8, 1
3238 ; CHECK-NEXT: vand.vx v8, v8, a1
3239 ; CHECK-NEXT: vand.vx v24, v24, a1
3240 ; CHECK-NEXT: vadd.vv v8, v8, v8
3241 ; CHECK-NEXT: vor.vv v8, v24, v8
3244 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked:
3245 ; CHECK-ZVBB: # %bb.0:
3246 ; CHECK-ZVBB-NEXT: csrr a1, vlenb
3247 ; CHECK-ZVBB-NEXT: slli a1, a1, 2
3248 ; CHECK-ZVBB-NEXT: sub a2, a0, a1
3249 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2
3250 ; CHECK-ZVBB-NEXT: addi a3, a3, -1
3251 ; CHECK-ZVBB-NEXT: and a2, a3, a2
3252 ; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma
3253 ; CHECK-ZVBB-NEXT: vbrev.v v16, v16
3254 ; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2
3255 ; CHECK-ZVBB-NEXT: # %bb.1:
3256 ; CHECK-ZVBB-NEXT: mv a0, a1
3257 ; CHECK-ZVBB-NEXT: .LBB47_2:
3258 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3259 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
3260 ; CHECK-ZVBB-NEXT: ret
3261 %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
3262 ret <vscale x 64 x i16> %v
3266 declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
3267 define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
3268 ; CHECK-LABEL: vp_bitreverse_nxv1i9:
3270 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
3271 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
3272 ; CHECK-NEXT: lui a0, 1
3273 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
3274 ; CHECK-NEXT: addi a0, a0, -241
3275 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
3276 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
3277 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
3278 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
3279 ; CHECK-NEXT: lui a0, 3
3280 ; CHECK-NEXT: addi a0, a0, 819
3281 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3282 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
3283 ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
3284 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
3285 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
3286 ; CHECK-NEXT: lui a0, 5
3287 ; CHECK-NEXT: addi a0, a0, 1365
3288 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3289 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
3290 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
3291 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
3292 ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
3293 ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3294 ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
3295 ; CHECK-NEXT: vsrl.vi v8, v8, 7, v0.t
3298 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9:
3299 ; CHECK-ZVBB: # %bb.0:
3300 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
3301 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
3302 ; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 7, v0.t
3303 ; CHECK-ZVBB-NEXT: ret
3304 %v = call <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
3305 ret <vscale x 1 x i9> %v