1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
7 define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) {
8 ; CHECK-LABEL: bitreverse_nxv1i8:
10 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
11 ; CHECK-NEXT: vsll.vi v9, v8, 4
12 ; CHECK-NEXT: vsrl.vi v8, v8, 4
13 ; CHECK-NEXT: li a0, 51
14 ; CHECK-NEXT: vor.vv v8, v8, v9
15 ; CHECK-NEXT: vsrl.vi v9, v8, 2
16 ; CHECK-NEXT: vand.vx v8, v8, a0
17 ; CHECK-NEXT: vand.vx v9, v9, a0
18 ; CHECK-NEXT: li a0, 85
19 ; CHECK-NEXT: vsll.vi v8, v8, 2
20 ; CHECK-NEXT: vor.vv v8, v9, v8
21 ; CHECK-NEXT: vsrl.vi v9, v8, 1
22 ; CHECK-NEXT: vand.vx v8, v8, a0
23 ; CHECK-NEXT: vand.vx v9, v9, a0
24 ; CHECK-NEXT: vadd.vv v8, v8, v8
25 ; CHECK-NEXT: vor.vv v8, v9, v8
28 ; CHECK-ZVBB-LABEL: bitreverse_nxv1i8:
29 ; CHECK-ZVBB: # %bb.0:
30 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
31 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
32 ; CHECK-ZVBB-NEXT: ret
33 %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va)
34 ret <vscale x 1 x i8> %a
36 declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
38 define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) {
39 ; CHECK-LABEL: bitreverse_nxv2i8:
41 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
42 ; CHECK-NEXT: vsll.vi v9, v8, 4
43 ; CHECK-NEXT: vsrl.vi v8, v8, 4
44 ; CHECK-NEXT: li a0, 51
45 ; CHECK-NEXT: vor.vv v8, v8, v9
46 ; CHECK-NEXT: vsrl.vi v9, v8, 2
47 ; CHECK-NEXT: vand.vx v8, v8, a0
48 ; CHECK-NEXT: vand.vx v9, v9, a0
49 ; CHECK-NEXT: li a0, 85
50 ; CHECK-NEXT: vsll.vi v8, v8, 2
51 ; CHECK-NEXT: vor.vv v8, v9, v8
52 ; CHECK-NEXT: vsrl.vi v9, v8, 1
53 ; CHECK-NEXT: vand.vx v8, v8, a0
54 ; CHECK-NEXT: vand.vx v9, v9, a0
55 ; CHECK-NEXT: vadd.vv v8, v8, v8
56 ; CHECK-NEXT: vor.vv v8, v9, v8
59 ; CHECK-ZVBB-LABEL: bitreverse_nxv2i8:
60 ; CHECK-ZVBB: # %bb.0:
61 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
62 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
63 ; CHECK-ZVBB-NEXT: ret
64 %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va)
65 ret <vscale x 2 x i8> %a
67 declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
69 define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) {
70 ; CHECK-LABEL: bitreverse_nxv4i8:
72 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
73 ; CHECK-NEXT: vsll.vi v9, v8, 4
74 ; CHECK-NEXT: vsrl.vi v8, v8, 4
75 ; CHECK-NEXT: li a0, 51
76 ; CHECK-NEXT: vor.vv v8, v8, v9
77 ; CHECK-NEXT: vsrl.vi v9, v8, 2
78 ; CHECK-NEXT: vand.vx v8, v8, a0
79 ; CHECK-NEXT: vand.vx v9, v9, a0
80 ; CHECK-NEXT: li a0, 85
81 ; CHECK-NEXT: vsll.vi v8, v8, 2
82 ; CHECK-NEXT: vor.vv v8, v9, v8
83 ; CHECK-NEXT: vsrl.vi v9, v8, 1
84 ; CHECK-NEXT: vand.vx v8, v8, a0
85 ; CHECK-NEXT: vand.vx v9, v9, a0
86 ; CHECK-NEXT: vadd.vv v8, v8, v8
87 ; CHECK-NEXT: vor.vv v8, v9, v8
90 ; CHECK-ZVBB-LABEL: bitreverse_nxv4i8:
91 ; CHECK-ZVBB: # %bb.0:
92 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
93 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
94 ; CHECK-ZVBB-NEXT: ret
95 %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va)
96 ret <vscale x 4 x i8> %a
98 declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
100 define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) {
101 ; CHECK-LABEL: bitreverse_nxv8i8:
103 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
104 ; CHECK-NEXT: vsll.vi v9, v8, 4
105 ; CHECK-NEXT: vsrl.vi v8, v8, 4
106 ; CHECK-NEXT: li a0, 51
107 ; CHECK-NEXT: vor.vv v8, v8, v9
108 ; CHECK-NEXT: vsrl.vi v9, v8, 2
109 ; CHECK-NEXT: vand.vx v8, v8, a0
110 ; CHECK-NEXT: vand.vx v9, v9, a0
111 ; CHECK-NEXT: li a0, 85
112 ; CHECK-NEXT: vsll.vi v8, v8, 2
113 ; CHECK-NEXT: vor.vv v8, v9, v8
114 ; CHECK-NEXT: vsrl.vi v9, v8, 1
115 ; CHECK-NEXT: vand.vx v8, v8, a0
116 ; CHECK-NEXT: vand.vx v9, v9, a0
117 ; CHECK-NEXT: vadd.vv v8, v8, v8
118 ; CHECK-NEXT: vor.vv v8, v9, v8
121 ; CHECK-ZVBB-LABEL: bitreverse_nxv8i8:
122 ; CHECK-ZVBB: # %bb.0:
123 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
124 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
125 ; CHECK-ZVBB-NEXT: ret
126 %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va)
127 ret <vscale x 8 x i8> %a
129 declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
131 define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) {
132 ; CHECK-LABEL: bitreverse_nxv16i8:
134 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
135 ; CHECK-NEXT: vsll.vi v10, v8, 4
136 ; CHECK-NEXT: vsrl.vi v8, v8, 4
137 ; CHECK-NEXT: li a0, 51
138 ; CHECK-NEXT: vor.vv v8, v8, v10
139 ; CHECK-NEXT: vsrl.vi v10, v8, 2
140 ; CHECK-NEXT: vand.vx v8, v8, a0
141 ; CHECK-NEXT: vand.vx v10, v10, a0
142 ; CHECK-NEXT: li a0, 85
143 ; CHECK-NEXT: vsll.vi v8, v8, 2
144 ; CHECK-NEXT: vor.vv v8, v10, v8
145 ; CHECK-NEXT: vsrl.vi v10, v8, 1
146 ; CHECK-NEXT: vand.vx v8, v8, a0
147 ; CHECK-NEXT: vand.vx v10, v10, a0
148 ; CHECK-NEXT: vadd.vv v8, v8, v8
149 ; CHECK-NEXT: vor.vv v8, v10, v8
152 ; CHECK-ZVBB-LABEL: bitreverse_nxv16i8:
153 ; CHECK-ZVBB: # %bb.0:
154 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
155 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
156 ; CHECK-ZVBB-NEXT: ret
157 %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va)
158 ret <vscale x 16 x i8> %a
160 declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
162 define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) {
163 ; CHECK-LABEL: bitreverse_nxv32i8:
165 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
166 ; CHECK-NEXT: vsll.vi v12, v8, 4
167 ; CHECK-NEXT: vsrl.vi v8, v8, 4
168 ; CHECK-NEXT: li a0, 51
169 ; CHECK-NEXT: vor.vv v8, v8, v12
170 ; CHECK-NEXT: vsrl.vi v12, v8, 2
171 ; CHECK-NEXT: vand.vx v8, v8, a0
172 ; CHECK-NEXT: vand.vx v12, v12, a0
173 ; CHECK-NEXT: li a0, 85
174 ; CHECK-NEXT: vsll.vi v8, v8, 2
175 ; CHECK-NEXT: vor.vv v8, v12, v8
176 ; CHECK-NEXT: vsrl.vi v12, v8, 1
177 ; CHECK-NEXT: vand.vx v8, v8, a0
178 ; CHECK-NEXT: vand.vx v12, v12, a0
179 ; CHECK-NEXT: vadd.vv v8, v8, v8
180 ; CHECK-NEXT: vor.vv v8, v12, v8
183 ; CHECK-ZVBB-LABEL: bitreverse_nxv32i8:
184 ; CHECK-ZVBB: # %bb.0:
185 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma
186 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
187 ; CHECK-ZVBB-NEXT: ret
188 %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va)
189 ret <vscale x 32 x i8> %a
191 declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>)
193 define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
194 ; CHECK-LABEL: bitreverse_nxv64i8:
196 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
197 ; CHECK-NEXT: vsll.vi v16, v8, 4
198 ; CHECK-NEXT: vsrl.vi v8, v8, 4
199 ; CHECK-NEXT: li a0, 51
200 ; CHECK-NEXT: vor.vv v8, v8, v16
201 ; CHECK-NEXT: vsrl.vi v16, v8, 2
202 ; CHECK-NEXT: vand.vx v8, v8, a0
203 ; CHECK-NEXT: vand.vx v16, v16, a0
204 ; CHECK-NEXT: li a0, 85
205 ; CHECK-NEXT: vsll.vi v8, v8, 2
206 ; CHECK-NEXT: vor.vv v8, v16, v8
207 ; CHECK-NEXT: vsrl.vi v16, v8, 1
208 ; CHECK-NEXT: vand.vx v8, v8, a0
209 ; CHECK-NEXT: vand.vx v16, v16, a0
210 ; CHECK-NEXT: vadd.vv v8, v8, v8
211 ; CHECK-NEXT: vor.vv v8, v16, v8
214 ; CHECK-ZVBB-LABEL: bitreverse_nxv64i8:
215 ; CHECK-ZVBB: # %bb.0:
216 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma
217 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
218 ; CHECK-ZVBB-NEXT: ret
219 %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va)
220 ret <vscale x 64 x i8> %a
222 declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>)
224 define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
225 ; CHECK-LABEL: bitreverse_nxv1i16:
227 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
228 ; CHECK-NEXT: vsrl.vi v9, v8, 8
229 ; CHECK-NEXT: vsll.vi v8, v8, 8
230 ; CHECK-NEXT: lui a0, 1
231 ; CHECK-NEXT: vor.vv v8, v8, v9
232 ; CHECK-NEXT: addi a0, a0, -241
233 ; CHECK-NEXT: vsrl.vi v9, v8, 4
234 ; CHECK-NEXT: vand.vx v8, v8, a0
235 ; CHECK-NEXT: vand.vx v9, v9, a0
236 ; CHECK-NEXT: lui a0, 3
237 ; CHECK-NEXT: addi a0, a0, 819
238 ; CHECK-NEXT: vsll.vi v8, v8, 4
239 ; CHECK-NEXT: vor.vv v8, v9, v8
240 ; CHECK-NEXT: vsrl.vi v9, v8, 2
241 ; CHECK-NEXT: vand.vx v8, v8, a0
242 ; CHECK-NEXT: vand.vx v9, v9, a0
243 ; CHECK-NEXT: lui a0, 5
244 ; CHECK-NEXT: addi a0, a0, 1365
245 ; CHECK-NEXT: vsll.vi v8, v8, 2
246 ; CHECK-NEXT: vor.vv v8, v9, v8
247 ; CHECK-NEXT: vsrl.vi v9, v8, 1
248 ; CHECK-NEXT: vand.vx v8, v8, a0
249 ; CHECK-NEXT: vand.vx v9, v9, a0
250 ; CHECK-NEXT: vadd.vv v8, v8, v8
251 ; CHECK-NEXT: vor.vv v8, v9, v8
254 ; CHECK-ZVBB-LABEL: bitreverse_nxv1i16:
255 ; CHECK-ZVBB: # %bb.0:
256 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
257 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
258 ; CHECK-ZVBB-NEXT: ret
259 %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va)
260 ret <vscale x 1 x i16> %a
262 declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
264 define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
265 ; CHECK-LABEL: bitreverse_nxv2i16:
267 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
268 ; CHECK-NEXT: vsrl.vi v9, v8, 8
269 ; CHECK-NEXT: vsll.vi v8, v8, 8
270 ; CHECK-NEXT: lui a0, 1
271 ; CHECK-NEXT: vor.vv v8, v8, v9
272 ; CHECK-NEXT: addi a0, a0, -241
273 ; CHECK-NEXT: vsrl.vi v9, v8, 4
274 ; CHECK-NEXT: vand.vx v8, v8, a0
275 ; CHECK-NEXT: vand.vx v9, v9, a0
276 ; CHECK-NEXT: lui a0, 3
277 ; CHECK-NEXT: addi a0, a0, 819
278 ; CHECK-NEXT: vsll.vi v8, v8, 4
279 ; CHECK-NEXT: vor.vv v8, v9, v8
280 ; CHECK-NEXT: vsrl.vi v9, v8, 2
281 ; CHECK-NEXT: vand.vx v8, v8, a0
282 ; CHECK-NEXT: vand.vx v9, v9, a0
283 ; CHECK-NEXT: lui a0, 5
284 ; CHECK-NEXT: addi a0, a0, 1365
285 ; CHECK-NEXT: vsll.vi v8, v8, 2
286 ; CHECK-NEXT: vor.vv v8, v9, v8
287 ; CHECK-NEXT: vsrl.vi v9, v8, 1
288 ; CHECK-NEXT: vand.vx v8, v8, a0
289 ; CHECK-NEXT: vand.vx v9, v9, a0
290 ; CHECK-NEXT: vadd.vv v8, v8, v8
291 ; CHECK-NEXT: vor.vv v8, v9, v8
294 ; CHECK-ZVBB-LABEL: bitreverse_nxv2i16:
295 ; CHECK-ZVBB: # %bb.0:
296 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
297 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
298 ; CHECK-ZVBB-NEXT: ret
299 %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va)
300 ret <vscale x 2 x i16> %a
302 declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
304 define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
305 ; CHECK-LABEL: bitreverse_nxv4i16:
307 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
308 ; CHECK-NEXT: vsrl.vi v9, v8, 8
309 ; CHECK-NEXT: vsll.vi v8, v8, 8
310 ; CHECK-NEXT: lui a0, 1
311 ; CHECK-NEXT: vor.vv v8, v8, v9
312 ; CHECK-NEXT: addi a0, a0, -241
313 ; CHECK-NEXT: vsrl.vi v9, v8, 4
314 ; CHECK-NEXT: vand.vx v8, v8, a0
315 ; CHECK-NEXT: vand.vx v9, v9, a0
316 ; CHECK-NEXT: lui a0, 3
317 ; CHECK-NEXT: addi a0, a0, 819
318 ; CHECK-NEXT: vsll.vi v8, v8, 4
319 ; CHECK-NEXT: vor.vv v8, v9, v8
320 ; CHECK-NEXT: vsrl.vi v9, v8, 2
321 ; CHECK-NEXT: vand.vx v8, v8, a0
322 ; CHECK-NEXT: vand.vx v9, v9, a0
323 ; CHECK-NEXT: lui a0, 5
324 ; CHECK-NEXT: addi a0, a0, 1365
325 ; CHECK-NEXT: vsll.vi v8, v8, 2
326 ; CHECK-NEXT: vor.vv v8, v9, v8
327 ; CHECK-NEXT: vsrl.vi v9, v8, 1
328 ; CHECK-NEXT: vand.vx v8, v8, a0
329 ; CHECK-NEXT: vand.vx v9, v9, a0
330 ; CHECK-NEXT: vadd.vv v8, v8, v8
331 ; CHECK-NEXT: vor.vv v8, v9, v8
334 ; CHECK-ZVBB-LABEL: bitreverse_nxv4i16:
335 ; CHECK-ZVBB: # %bb.0:
336 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
337 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
338 ; CHECK-ZVBB-NEXT: ret
339 %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va)
340 ret <vscale x 4 x i16> %a
342 declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
344 define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
345 ; CHECK-LABEL: bitreverse_nxv8i16:
347 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
348 ; CHECK-NEXT: vsrl.vi v10, v8, 8
349 ; CHECK-NEXT: vsll.vi v8, v8, 8
350 ; CHECK-NEXT: lui a0, 1
351 ; CHECK-NEXT: vor.vv v8, v8, v10
352 ; CHECK-NEXT: addi a0, a0, -241
353 ; CHECK-NEXT: vsrl.vi v10, v8, 4
354 ; CHECK-NEXT: vand.vx v8, v8, a0
355 ; CHECK-NEXT: vand.vx v10, v10, a0
356 ; CHECK-NEXT: lui a0, 3
357 ; CHECK-NEXT: addi a0, a0, 819
358 ; CHECK-NEXT: vsll.vi v8, v8, 4
359 ; CHECK-NEXT: vor.vv v8, v10, v8
360 ; CHECK-NEXT: vsrl.vi v10, v8, 2
361 ; CHECK-NEXT: vand.vx v8, v8, a0
362 ; CHECK-NEXT: vand.vx v10, v10, a0
363 ; CHECK-NEXT: lui a0, 5
364 ; CHECK-NEXT: addi a0, a0, 1365
365 ; CHECK-NEXT: vsll.vi v8, v8, 2
366 ; CHECK-NEXT: vor.vv v8, v10, v8
367 ; CHECK-NEXT: vsrl.vi v10, v8, 1
368 ; CHECK-NEXT: vand.vx v8, v8, a0
369 ; CHECK-NEXT: vand.vx v10, v10, a0
370 ; CHECK-NEXT: vadd.vv v8, v8, v8
371 ; CHECK-NEXT: vor.vv v8, v10, v8
374 ; CHECK-ZVBB-LABEL: bitreverse_nxv8i16:
375 ; CHECK-ZVBB: # %bb.0:
376 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
377 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
378 ; CHECK-ZVBB-NEXT: ret
379 %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va)
380 ret <vscale x 8 x i16> %a
382 declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
384 define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
385 ; CHECK-LABEL: bitreverse_nxv16i16:
387 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
388 ; CHECK-NEXT: vsrl.vi v12, v8, 8
389 ; CHECK-NEXT: vsll.vi v8, v8, 8
390 ; CHECK-NEXT: lui a0, 1
391 ; CHECK-NEXT: vor.vv v8, v8, v12
392 ; CHECK-NEXT: addi a0, a0, -241
393 ; CHECK-NEXT: vsrl.vi v12, v8, 4
394 ; CHECK-NEXT: vand.vx v8, v8, a0
395 ; CHECK-NEXT: vand.vx v12, v12, a0
396 ; CHECK-NEXT: lui a0, 3
397 ; CHECK-NEXT: addi a0, a0, 819
398 ; CHECK-NEXT: vsll.vi v8, v8, 4
399 ; CHECK-NEXT: vor.vv v8, v12, v8
400 ; CHECK-NEXT: vsrl.vi v12, v8, 2
401 ; CHECK-NEXT: vand.vx v8, v8, a0
402 ; CHECK-NEXT: vand.vx v12, v12, a0
403 ; CHECK-NEXT: lui a0, 5
404 ; CHECK-NEXT: addi a0, a0, 1365
405 ; CHECK-NEXT: vsll.vi v8, v8, 2
406 ; CHECK-NEXT: vor.vv v8, v12, v8
407 ; CHECK-NEXT: vsrl.vi v12, v8, 1
408 ; CHECK-NEXT: vand.vx v8, v8, a0
409 ; CHECK-NEXT: vand.vx v12, v12, a0
410 ; CHECK-NEXT: vadd.vv v8, v8, v8
411 ; CHECK-NEXT: vor.vv v8, v12, v8
414 ; CHECK-ZVBB-LABEL: bitreverse_nxv16i16:
415 ; CHECK-ZVBB: # %bb.0:
416 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
417 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
418 ; CHECK-ZVBB-NEXT: ret
419 %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va)
420 ret <vscale x 16 x i16> %a
422 declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
424 define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
425 ; CHECK-LABEL: bitreverse_nxv32i16:
427 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
428 ; CHECK-NEXT: vsrl.vi v16, v8, 8
429 ; CHECK-NEXT: vsll.vi v8, v8, 8
430 ; CHECK-NEXT: lui a0, 1
431 ; CHECK-NEXT: vor.vv v8, v8, v16
432 ; CHECK-NEXT: addi a0, a0, -241
433 ; CHECK-NEXT: vsrl.vi v16, v8, 4
434 ; CHECK-NEXT: vand.vx v8, v8, a0
435 ; CHECK-NEXT: vand.vx v16, v16, a0
436 ; CHECK-NEXT: lui a0, 3
437 ; CHECK-NEXT: addi a0, a0, 819
438 ; CHECK-NEXT: vsll.vi v8, v8, 4
439 ; CHECK-NEXT: vor.vv v8, v16, v8
440 ; CHECK-NEXT: vsrl.vi v16, v8, 2
441 ; CHECK-NEXT: vand.vx v8, v8, a0
442 ; CHECK-NEXT: vand.vx v16, v16, a0
443 ; CHECK-NEXT: lui a0, 5
444 ; CHECK-NEXT: addi a0, a0, 1365
445 ; CHECK-NEXT: vsll.vi v8, v8, 2
446 ; CHECK-NEXT: vor.vv v8, v16, v8
447 ; CHECK-NEXT: vsrl.vi v16, v8, 1
448 ; CHECK-NEXT: vand.vx v8, v8, a0
449 ; CHECK-NEXT: vand.vx v16, v16, a0
450 ; CHECK-NEXT: vadd.vv v8, v8, v8
451 ; CHECK-NEXT: vor.vv v8, v16, v8
454 ; CHECK-ZVBB-LABEL: bitreverse_nxv32i16:
455 ; CHECK-ZVBB: # %bb.0:
456 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
457 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
458 ; CHECK-ZVBB-NEXT: ret
459 %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va)
460 ret <vscale x 32 x i16> %a
462 declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>)
464 define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
465 ; CHECK-LABEL: bitreverse_nxv1i32:
467 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
468 ; CHECK-NEXT: vsrl.vi v9, v8, 8
469 ; CHECK-NEXT: lui a0, 16
470 ; CHECK-NEXT: vsrl.vi v10, v8, 24
471 ; CHECK-NEXT: addi a0, a0, -256
472 ; CHECK-NEXT: vand.vx v9, v9, a0
473 ; CHECK-NEXT: vor.vv v9, v9, v10
474 ; CHECK-NEXT: vsll.vi v10, v8, 24
475 ; CHECK-NEXT: vand.vx v8, v8, a0
476 ; CHECK-NEXT: lui a0, 61681
477 ; CHECK-NEXT: addi a0, a0, -241
478 ; CHECK-NEXT: vsll.vi v8, v8, 8
479 ; CHECK-NEXT: vor.vv v8, v10, v8
480 ; CHECK-NEXT: vor.vv v8, v8, v9
481 ; CHECK-NEXT: vsrl.vi v9, v8, 4
482 ; CHECK-NEXT: vand.vx v8, v8, a0
483 ; CHECK-NEXT: vand.vx v9, v9, a0
484 ; CHECK-NEXT: lui a0, 209715
485 ; CHECK-NEXT: addi a0, a0, 819
486 ; CHECK-NEXT: vsll.vi v8, v8, 4
487 ; CHECK-NEXT: vor.vv v8, v9, v8
488 ; CHECK-NEXT: vsrl.vi v9, v8, 2
489 ; CHECK-NEXT: vand.vx v8, v8, a0
490 ; CHECK-NEXT: vand.vx v9, v9, a0
491 ; CHECK-NEXT: lui a0, 349525
492 ; CHECK-NEXT: addi a0, a0, 1365
493 ; CHECK-NEXT: vsll.vi v8, v8, 2
494 ; CHECK-NEXT: vor.vv v8, v9, v8
495 ; CHECK-NEXT: vsrl.vi v9, v8, 1
496 ; CHECK-NEXT: vand.vx v8, v8, a0
497 ; CHECK-NEXT: vand.vx v9, v9, a0
498 ; CHECK-NEXT: vadd.vv v8, v8, v8
499 ; CHECK-NEXT: vor.vv v8, v9, v8
502 ; CHECK-ZVBB-LABEL: bitreverse_nxv1i32:
503 ; CHECK-ZVBB: # %bb.0:
504 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
505 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
506 ; CHECK-ZVBB-NEXT: ret
507 %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va)
508 ret <vscale x 1 x i32> %a
510 declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
512 define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
513 ; CHECK-LABEL: bitreverse_nxv2i32:
515 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
516 ; CHECK-NEXT: vsrl.vi v9, v8, 8
517 ; CHECK-NEXT: lui a0, 16
518 ; CHECK-NEXT: vsrl.vi v10, v8, 24
519 ; CHECK-NEXT: addi a0, a0, -256
520 ; CHECK-NEXT: vand.vx v9, v9, a0
521 ; CHECK-NEXT: vor.vv v9, v9, v10
522 ; CHECK-NEXT: vsll.vi v10, v8, 24
523 ; CHECK-NEXT: vand.vx v8, v8, a0
524 ; CHECK-NEXT: lui a0, 61681
525 ; CHECK-NEXT: addi a0, a0, -241
526 ; CHECK-NEXT: vsll.vi v8, v8, 8
527 ; CHECK-NEXT: vor.vv v8, v10, v8
528 ; CHECK-NEXT: vor.vv v8, v8, v9
529 ; CHECK-NEXT: vsrl.vi v9, v8, 4
530 ; CHECK-NEXT: vand.vx v8, v8, a0
531 ; CHECK-NEXT: vand.vx v9, v9, a0
532 ; CHECK-NEXT: lui a0, 209715
533 ; CHECK-NEXT: addi a0, a0, 819
534 ; CHECK-NEXT: vsll.vi v8, v8, 4
535 ; CHECK-NEXT: vor.vv v8, v9, v8
536 ; CHECK-NEXT: vsrl.vi v9, v8, 2
537 ; CHECK-NEXT: vand.vx v8, v8, a0
538 ; CHECK-NEXT: vand.vx v9, v9, a0
539 ; CHECK-NEXT: lui a0, 349525
540 ; CHECK-NEXT: addi a0, a0, 1365
541 ; CHECK-NEXT: vsll.vi v8, v8, 2
542 ; CHECK-NEXT: vor.vv v8, v9, v8
543 ; CHECK-NEXT: vsrl.vi v9, v8, 1
544 ; CHECK-NEXT: vand.vx v8, v8, a0
545 ; CHECK-NEXT: vand.vx v9, v9, a0
546 ; CHECK-NEXT: vadd.vv v8, v8, v8
547 ; CHECK-NEXT: vor.vv v8, v9, v8
550 ; CHECK-ZVBB-LABEL: bitreverse_nxv2i32:
551 ; CHECK-ZVBB: # %bb.0:
552 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
553 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
554 ; CHECK-ZVBB-NEXT: ret
555 %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va)
556 ret <vscale x 2 x i32> %a
558 declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
560 define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
561 ; CHECK-LABEL: bitreverse_nxv4i32:
563 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
564 ; CHECK-NEXT: vsrl.vi v10, v8, 8
565 ; CHECK-NEXT: lui a0, 16
566 ; CHECK-NEXT: vsrl.vi v12, v8, 24
567 ; CHECK-NEXT: addi a0, a0, -256
568 ; CHECK-NEXT: vand.vx v10, v10, a0
569 ; CHECK-NEXT: vor.vv v10, v10, v12
570 ; CHECK-NEXT: vsll.vi v12, v8, 24
571 ; CHECK-NEXT: vand.vx v8, v8, a0
572 ; CHECK-NEXT: lui a0, 61681
573 ; CHECK-NEXT: addi a0, a0, -241
574 ; CHECK-NEXT: vsll.vi v8, v8, 8
575 ; CHECK-NEXT: vor.vv v8, v12, v8
576 ; CHECK-NEXT: vor.vv v8, v8, v10
577 ; CHECK-NEXT: vsrl.vi v10, v8, 4
578 ; CHECK-NEXT: vand.vx v8, v8, a0
579 ; CHECK-NEXT: vand.vx v10, v10, a0
580 ; CHECK-NEXT: lui a0, 209715
581 ; CHECK-NEXT: addi a0, a0, 819
582 ; CHECK-NEXT: vsll.vi v8, v8, 4
583 ; CHECK-NEXT: vor.vv v8, v10, v8
584 ; CHECK-NEXT: vsrl.vi v10, v8, 2
585 ; CHECK-NEXT: vand.vx v8, v8, a0
586 ; CHECK-NEXT: vand.vx v10, v10, a0
587 ; CHECK-NEXT: lui a0, 349525
588 ; CHECK-NEXT: addi a0, a0, 1365
589 ; CHECK-NEXT: vsll.vi v8, v8, 2
590 ; CHECK-NEXT: vor.vv v8, v10, v8
591 ; CHECK-NEXT: vsrl.vi v10, v8, 1
592 ; CHECK-NEXT: vand.vx v8, v8, a0
593 ; CHECK-NEXT: vand.vx v10, v10, a0
594 ; CHECK-NEXT: vadd.vv v8, v8, v8
595 ; CHECK-NEXT: vor.vv v8, v10, v8
598 ; CHECK-ZVBB-LABEL: bitreverse_nxv4i32:
599 ; CHECK-ZVBB: # %bb.0:
600 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
601 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
602 ; CHECK-ZVBB-NEXT: ret
603 %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va)
604 ret <vscale x 4 x i32> %a
606 declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
608 define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
609 ; CHECK-LABEL: bitreverse_nxv8i32:
611 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
612 ; CHECK-NEXT: vsrl.vi v12, v8, 8
613 ; CHECK-NEXT: lui a0, 16
614 ; CHECK-NEXT: vsrl.vi v16, v8, 24
615 ; CHECK-NEXT: addi a0, a0, -256
616 ; CHECK-NEXT: vand.vx v12, v12, a0
617 ; CHECK-NEXT: vor.vv v12, v12, v16
618 ; CHECK-NEXT: vsll.vi v16, v8, 24
619 ; CHECK-NEXT: vand.vx v8, v8, a0
620 ; CHECK-NEXT: lui a0, 61681
621 ; CHECK-NEXT: addi a0, a0, -241
622 ; CHECK-NEXT: vsll.vi v8, v8, 8
623 ; CHECK-NEXT: vor.vv v8, v16, v8
624 ; CHECK-NEXT: vor.vv v8, v8, v12
625 ; CHECK-NEXT: vsrl.vi v12, v8, 4
626 ; CHECK-NEXT: vand.vx v8, v8, a0
627 ; CHECK-NEXT: vand.vx v12, v12, a0
628 ; CHECK-NEXT: lui a0, 209715
629 ; CHECK-NEXT: addi a0, a0, 819
630 ; CHECK-NEXT: vsll.vi v8, v8, 4
631 ; CHECK-NEXT: vor.vv v8, v12, v8
632 ; CHECK-NEXT: vsrl.vi v12, v8, 2
633 ; CHECK-NEXT: vand.vx v8, v8, a0
634 ; CHECK-NEXT: vand.vx v12, v12, a0
635 ; CHECK-NEXT: lui a0, 349525
636 ; CHECK-NEXT: addi a0, a0, 1365
637 ; CHECK-NEXT: vsll.vi v8, v8, 2
638 ; CHECK-NEXT: vor.vv v8, v12, v8
639 ; CHECK-NEXT: vsrl.vi v12, v8, 1
640 ; CHECK-NEXT: vand.vx v8, v8, a0
641 ; CHECK-NEXT: vand.vx v12, v12, a0
642 ; CHECK-NEXT: vadd.vv v8, v8, v8
643 ; CHECK-NEXT: vor.vv v8, v12, v8
646 ; CHECK-ZVBB-LABEL: bitreverse_nxv8i32:
647 ; CHECK-ZVBB: # %bb.0:
648 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m4, ta, ma
649 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
650 ; CHECK-ZVBB-NEXT: ret
651 %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va)
652 ret <vscale x 8 x i32> %a
654 declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
656 define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) {
657 ; CHECK-LABEL: bitreverse_nxv16i32:
659 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
660 ; CHECK-NEXT: vsrl.vi v16, v8, 8
661 ; CHECK-NEXT: lui a0, 16
662 ; CHECK-NEXT: vsrl.vi v24, v8, 24
663 ; CHECK-NEXT: addi a0, a0, -256
664 ; CHECK-NEXT: vand.vx v16, v16, a0
665 ; CHECK-NEXT: vor.vv v16, v16, v24
666 ; CHECK-NEXT: vsll.vi v24, v8, 24
667 ; CHECK-NEXT: vand.vx v8, v8, a0
668 ; CHECK-NEXT: lui a0, 61681
669 ; CHECK-NEXT: addi a0, a0, -241
670 ; CHECK-NEXT: vsll.vi v8, v8, 8
671 ; CHECK-NEXT: vor.vv v8, v24, v8
672 ; CHECK-NEXT: vor.vv v8, v8, v16
673 ; CHECK-NEXT: vsrl.vi v16, v8, 4
674 ; CHECK-NEXT: vand.vx v8, v8, a0
675 ; CHECK-NEXT: vand.vx v16, v16, a0
676 ; CHECK-NEXT: lui a0, 209715
677 ; CHECK-NEXT: addi a0, a0, 819
678 ; CHECK-NEXT: vsll.vi v8, v8, 4
679 ; CHECK-NEXT: vor.vv v8, v16, v8
680 ; CHECK-NEXT: vsrl.vi v16, v8, 2
681 ; CHECK-NEXT: vand.vx v8, v8, a0
682 ; CHECK-NEXT: vand.vx v16, v16, a0
683 ; CHECK-NEXT: lui a0, 349525
684 ; CHECK-NEXT: addi a0, a0, 1365
685 ; CHECK-NEXT: vsll.vi v8, v8, 2
686 ; CHECK-NEXT: vor.vv v8, v16, v8
687 ; CHECK-NEXT: vsrl.vi v16, v8, 1
688 ; CHECK-NEXT: vand.vx v8, v8, a0
689 ; CHECK-NEXT: vand.vx v16, v16, a0
690 ; CHECK-NEXT: vadd.vv v8, v8, v8
691 ; CHECK-NEXT: vor.vv v8, v16, v8
694 ; CHECK-ZVBB-LABEL: bitreverse_nxv16i32:
695 ; CHECK-ZVBB: # %bb.0:
696 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
697 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
698 ; CHECK-ZVBB-NEXT: ret
699 %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va)
700 ret <vscale x 16 x i32> %a
702 declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
704 define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) {
705 ; RV32-LABEL: bitreverse_nxv1i64:
707 ; RV32-NEXT: addi sp, sp, -16
708 ; RV32-NEXT: .cfi_def_cfa_offset 16
709 ; RV32-NEXT: lui a0, 1044480
710 ; RV32-NEXT: li a1, 56
711 ; RV32-NEXT: li a2, 40
712 ; RV32-NEXT: lui a3, 16
713 ; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma
714 ; RV32-NEXT: vsrl.vi v9, v8, 24
715 ; RV32-NEXT: lui a4, 4080
716 ; RV32-NEXT: addi a5, sp, 8
717 ; RV32-NEXT: sw a0, 8(sp)
718 ; RV32-NEXT: sw zero, 12(sp)
719 ; RV32-NEXT: vsrl.vx v10, v8, a1
720 ; RV32-NEXT: vsrl.vx v11, v8, a2
721 ; RV32-NEXT: addi a0, a3, -256
722 ; RV32-NEXT: vsll.vx v12, v8, a1
723 ; RV32-NEXT: vand.vx v11, v11, a0
724 ; RV32-NEXT: vlse64.v v13, (a5), zero
725 ; RV32-NEXT: vor.vv v10, v11, v10
726 ; RV32-NEXT: vand.vx v11, v8, a0
727 ; RV32-NEXT: vsll.vx v11, v11, a2
728 ; RV32-NEXT: vor.vv v11, v12, v11
729 ; RV32-NEXT: vsrl.vi v12, v8, 8
730 ; RV32-NEXT: vand.vx v9, v9, a4
731 ; RV32-NEXT: vand.vv v12, v12, v13
732 ; RV32-NEXT: vor.vv v9, v12, v9
733 ; RV32-NEXT: lui a0, 61681
734 ; RV32-NEXT: lui a1, 209715
735 ; RV32-NEXT: lui a2, 349525
736 ; RV32-NEXT: vand.vv v12, v8, v13
737 ; RV32-NEXT: vand.vx v8, v8, a4
738 ; RV32-NEXT: addi a0, a0, -241
739 ; RV32-NEXT: addi a1, a1, 819
740 ; RV32-NEXT: addi a2, a2, 1365
741 ; RV32-NEXT: vsll.vi v8, v8, 24
742 ; RV32-NEXT: vor.vv v9, v9, v10
743 ; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
744 ; RV32-NEXT: vmv.v.x v10, a0
745 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
746 ; RV32-NEXT: vsll.vi v12, v12, 8
747 ; RV32-NEXT: vor.vv v8, v8, v12
748 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
749 ; RV32-NEXT: vmv.v.x v12, a1
750 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
751 ; RV32-NEXT: vor.vv v8, v11, v8
752 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
753 ; RV32-NEXT: vmv.v.x v11, a2
754 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
755 ; RV32-NEXT: vor.vv v8, v8, v9
756 ; RV32-NEXT: vsrl.vi v9, v8, 4
757 ; RV32-NEXT: vand.vv v8, v8, v10
758 ; RV32-NEXT: vand.vv v9, v9, v10
759 ; RV32-NEXT: vsll.vi v8, v8, 4
760 ; RV32-NEXT: vor.vv v8, v9, v8
761 ; RV32-NEXT: vsrl.vi v9, v8, 2
762 ; RV32-NEXT: vand.vv v8, v8, v12
763 ; RV32-NEXT: vand.vv v9, v9, v12
764 ; RV32-NEXT: vsll.vi v8, v8, 2
765 ; RV32-NEXT: vor.vv v8, v9, v8
766 ; RV32-NEXT: vsrl.vi v9, v8, 1
767 ; RV32-NEXT: vand.vv v8, v8, v11
768 ; RV32-NEXT: vand.vv v9, v9, v11
769 ; RV32-NEXT: vadd.vv v8, v8, v8
770 ; RV32-NEXT: vor.vv v8, v9, v8
771 ; RV32-NEXT: addi sp, sp, 16
772 ; RV32-NEXT: .cfi_def_cfa_offset 0
775 ; RV64-LABEL: bitreverse_nxv1i64:
777 ; RV64-NEXT: li a1, 56
778 ; RV64-NEXT: li a0, 40
779 ; RV64-NEXT: lui a2, 16
780 ; RV64-NEXT: vsetvli a3, zero, e64, m1, ta, ma
781 ; RV64-NEXT: vsrl.vi v9, v8, 24
782 ; RV64-NEXT: lui a3, 4080
783 ; RV64-NEXT: vsrl.vx v10, v8, a1
784 ; RV64-NEXT: vsrl.vx v11, v8, a0
785 ; RV64-NEXT: addiw a2, a2, -256
786 ; RV64-NEXT: vand.vx v11, v11, a2
787 ; RV64-NEXT: vor.vv v10, v11, v10
788 ; RV64-NEXT: vsrl.vi v11, v8, 8
789 ; RV64-NEXT: li a4, 255
790 ; RV64-NEXT: vand.vx v9, v9, a3
791 ; RV64-NEXT: slli a4, a4, 24
792 ; RV64-NEXT: vand.vx v11, v11, a4
793 ; RV64-NEXT: vor.vv v9, v11, v9
794 ; RV64-NEXT: vand.vx v11, v8, a3
795 ; RV64-NEXT: lui a3, 61681
796 ; RV64-NEXT: vor.vv v9, v9, v10
797 ; RV64-NEXT: vand.vx v10, v8, a4
798 ; RV64-NEXT: lui a4, 209715
799 ; RV64-NEXT: vsll.vi v11, v11, 24
800 ; RV64-NEXT: vsll.vi v10, v10, 8
801 ; RV64-NEXT: vor.vv v10, v11, v10
802 ; RV64-NEXT: vsll.vx v11, v8, a1
803 ; RV64-NEXT: lui a1, 349525
804 ; RV64-NEXT: addiw a3, a3, -241
805 ; RV64-NEXT: addiw a4, a4, 819
806 ; RV64-NEXT: addiw a1, a1, 1365
807 ; RV64-NEXT: vand.vx v8, v8, a2
808 ; RV64-NEXT: slli a2, a3, 32
809 ; RV64-NEXT: vsll.vx v8, v8, a0
810 ; RV64-NEXT: slli a0, a4, 32
811 ; RV64-NEXT: add a2, a3, a2
812 ; RV64-NEXT: slli a3, a1, 32
813 ; RV64-NEXT: add a0, a4, a0
814 ; RV64-NEXT: add a1, a1, a3
815 ; RV64-NEXT: vor.vv v8, v11, v8
816 ; RV64-NEXT: vor.vv v8, v8, v10
817 ; RV64-NEXT: vor.vv v8, v8, v9
818 ; RV64-NEXT: vsrl.vi v9, v8, 4
819 ; RV64-NEXT: vand.vx v8, v8, a2
820 ; RV64-NEXT: vand.vx v9, v9, a2
821 ; RV64-NEXT: vsll.vi v8, v8, 4
822 ; RV64-NEXT: vor.vv v8, v9, v8
823 ; RV64-NEXT: vsrl.vi v9, v8, 2
824 ; RV64-NEXT: vand.vx v8, v8, a0
825 ; RV64-NEXT: vand.vx v9, v9, a0
826 ; RV64-NEXT: vsll.vi v8, v8, 2
827 ; RV64-NEXT: vor.vv v8, v9, v8
828 ; RV64-NEXT: vsrl.vi v9, v8, 1
829 ; RV64-NEXT: vand.vx v8, v8, a1
830 ; RV64-NEXT: vand.vx v9, v9, a1
831 ; RV64-NEXT: vadd.vv v8, v8, v8
832 ; RV64-NEXT: vor.vv v8, v9, v8
835 ; CHECK-ZVBB-LABEL: bitreverse_nxv1i64:
836 ; CHECK-ZVBB: # %bb.0:
837 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
838 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
839 ; CHECK-ZVBB-NEXT: ret
840 %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va)
841 ret <vscale x 1 x i64> %a
843 declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
845 define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) {
846 ; RV32-LABEL: bitreverse_nxv2i64:
848 ; RV32-NEXT: addi sp, sp, -16
849 ; RV32-NEXT: .cfi_def_cfa_offset 16
850 ; RV32-NEXT: lui a0, 1044480
851 ; RV32-NEXT: li a1, 56
852 ; RV32-NEXT: li a2, 40
853 ; RV32-NEXT: lui a3, 16
854 ; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma
855 ; RV32-NEXT: vsrl.vi v16, v8, 24
856 ; RV32-NEXT: lui a4, 4080
857 ; RV32-NEXT: addi a5, sp, 8
858 ; RV32-NEXT: sw a0, 8(sp)
859 ; RV32-NEXT: sw zero, 12(sp)
860 ; RV32-NEXT: vsrl.vx v10, v8, a1
861 ; RV32-NEXT: vsrl.vx v12, v8, a2
862 ; RV32-NEXT: addi a0, a3, -256
863 ; RV32-NEXT: vsll.vx v18, v8, a1
864 ; RV32-NEXT: vand.vx v12, v12, a0
865 ; RV32-NEXT: vlse64.v v14, (a5), zero
866 ; RV32-NEXT: vor.vv v12, v12, v10
867 ; RV32-NEXT: vand.vx v10, v8, a0
868 ; RV32-NEXT: vsll.vx v10, v10, a2
869 ; RV32-NEXT: vor.vv v10, v18, v10
870 ; RV32-NEXT: vsrl.vi v18, v8, 8
871 ; RV32-NEXT: vand.vx v16, v16, a4
872 ; RV32-NEXT: vand.vv v18, v18, v14
873 ; RV32-NEXT: vor.vv v16, v18, v16
874 ; RV32-NEXT: lui a0, 61681
875 ; RV32-NEXT: lui a1, 209715
876 ; RV32-NEXT: lui a2, 349525
877 ; RV32-NEXT: vand.vv v14, v8, v14
878 ; RV32-NEXT: vand.vx v8, v8, a4
879 ; RV32-NEXT: addi a0, a0, -241
880 ; RV32-NEXT: addi a1, a1, 819
881 ; RV32-NEXT: addi a2, a2, 1365
882 ; RV32-NEXT: vsll.vi v8, v8, 24
883 ; RV32-NEXT: vor.vv v12, v16, v12
884 ; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma
885 ; RV32-NEXT: vmv.v.x v16, a0
886 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
887 ; RV32-NEXT: vsll.vi v14, v14, 8
888 ; RV32-NEXT: vor.vv v8, v8, v14
889 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
890 ; RV32-NEXT: vmv.v.x v14, a1
891 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
892 ; RV32-NEXT: vor.vv v8, v10, v8
893 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
894 ; RV32-NEXT: vmv.v.x v10, a2
895 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
896 ; RV32-NEXT: vor.vv v8, v8, v12
897 ; RV32-NEXT: vsrl.vi v12, v8, 4
898 ; RV32-NEXT: vand.vv v8, v8, v16
899 ; RV32-NEXT: vand.vv v12, v12, v16
900 ; RV32-NEXT: vsll.vi v8, v8, 4
901 ; RV32-NEXT: vor.vv v8, v12, v8
902 ; RV32-NEXT: vsrl.vi v12, v8, 2
903 ; RV32-NEXT: vand.vv v8, v8, v14
904 ; RV32-NEXT: vand.vv v12, v12, v14
905 ; RV32-NEXT: vsll.vi v8, v8, 2
906 ; RV32-NEXT: vor.vv v8, v12, v8
907 ; RV32-NEXT: vsrl.vi v12, v8, 1
908 ; RV32-NEXT: vand.vv v8, v8, v10
909 ; RV32-NEXT: vand.vv v10, v12, v10
910 ; RV32-NEXT: vadd.vv v8, v8, v8
911 ; RV32-NEXT: vor.vv v8, v10, v8
912 ; RV32-NEXT: addi sp, sp, 16
913 ; RV32-NEXT: .cfi_def_cfa_offset 0
916 ; RV64-LABEL: bitreverse_nxv2i64:
918 ; RV64-NEXT: li a1, 56
919 ; RV64-NEXT: li a0, 40
920 ; RV64-NEXT: lui a2, 16
921 ; RV64-NEXT: vsetvli a3, zero, e64, m2, ta, ma
922 ; RV64-NEXT: vsrl.vi v10, v8, 24
923 ; RV64-NEXT: lui a3, 4080
924 ; RV64-NEXT: vsrl.vx v12, v8, a1
925 ; RV64-NEXT: vsrl.vx v14, v8, a0
926 ; RV64-NEXT: addiw a2, a2, -256
927 ; RV64-NEXT: vand.vx v14, v14, a2
928 ; RV64-NEXT: vor.vv v12, v14, v12
929 ; RV64-NEXT: vsrl.vi v14, v8, 8
930 ; RV64-NEXT: li a4, 255
931 ; RV64-NEXT: vand.vx v10, v10, a3
932 ; RV64-NEXT: slli a4, a4, 24
933 ; RV64-NEXT: vand.vx v14, v14, a4
934 ; RV64-NEXT: vor.vv v10, v14, v10
935 ; RV64-NEXT: vand.vx v14, v8, a3
936 ; RV64-NEXT: lui a3, 61681
937 ; RV64-NEXT: vor.vv v10, v10, v12
938 ; RV64-NEXT: vand.vx v12, v8, a4
939 ; RV64-NEXT: lui a4, 209715
940 ; RV64-NEXT: vsll.vi v14, v14, 24
941 ; RV64-NEXT: vsll.vi v12, v12, 8
942 ; RV64-NEXT: vor.vv v12, v14, v12
943 ; RV64-NEXT: vsll.vx v14, v8, a1
944 ; RV64-NEXT: lui a1, 349525
945 ; RV64-NEXT: addiw a3, a3, -241
946 ; RV64-NEXT: addiw a4, a4, 819
947 ; RV64-NEXT: addiw a1, a1, 1365
948 ; RV64-NEXT: vand.vx v8, v8, a2
949 ; RV64-NEXT: slli a2, a3, 32
950 ; RV64-NEXT: vsll.vx v8, v8, a0
951 ; RV64-NEXT: slli a0, a4, 32
952 ; RV64-NEXT: add a2, a3, a2
953 ; RV64-NEXT: slli a3, a1, 32
954 ; RV64-NEXT: add a0, a4, a0
955 ; RV64-NEXT: add a1, a1, a3
956 ; RV64-NEXT: vor.vv v8, v14, v8
957 ; RV64-NEXT: vor.vv v8, v8, v12
958 ; RV64-NEXT: vor.vv v8, v8, v10
959 ; RV64-NEXT: vsrl.vi v10, v8, 4
960 ; RV64-NEXT: vand.vx v8, v8, a2
961 ; RV64-NEXT: vand.vx v10, v10, a2
962 ; RV64-NEXT: vsll.vi v8, v8, 4
963 ; RV64-NEXT: vor.vv v8, v10, v8
964 ; RV64-NEXT: vsrl.vi v10, v8, 2
965 ; RV64-NEXT: vand.vx v8, v8, a0
966 ; RV64-NEXT: vand.vx v10, v10, a0
967 ; RV64-NEXT: vsll.vi v8, v8, 2
968 ; RV64-NEXT: vor.vv v8, v10, v8
969 ; RV64-NEXT: vsrl.vi v10, v8, 1
970 ; RV64-NEXT: vand.vx v8, v8, a1
971 ; RV64-NEXT: vand.vx v10, v10, a1
972 ; RV64-NEXT: vadd.vv v8, v8, v8
973 ; RV64-NEXT: vor.vv v8, v10, v8
976 ; CHECK-ZVBB-LABEL: bitreverse_nxv2i64:
977 ; CHECK-ZVBB: # %bb.0:
978 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
979 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
980 ; CHECK-ZVBB-NEXT: ret
981 %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va)
982 ret <vscale x 2 x i64> %a
984 declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
986 define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) {
987 ; RV32-LABEL: bitreverse_nxv4i64:
989 ; RV32-NEXT: addi sp, sp, -16
990 ; RV32-NEXT: .cfi_def_cfa_offset 16
991 ; RV32-NEXT: lui a0, 1044480
992 ; RV32-NEXT: li a1, 56
993 ; RV32-NEXT: li a2, 40
994 ; RV32-NEXT: lui a3, 16
995 ; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma
996 ; RV32-NEXT: vsrl.vi v24, v8, 24
997 ; RV32-NEXT: lui a4, 4080
998 ; RV32-NEXT: addi a5, sp, 8
999 ; RV32-NEXT: sw a0, 8(sp)
1000 ; RV32-NEXT: sw zero, 12(sp)
1001 ; RV32-NEXT: vsrl.vx v12, v8, a1
1002 ; RV32-NEXT: vsrl.vx v16, v8, a2
1003 ; RV32-NEXT: addi a0, a3, -256
1004 ; RV32-NEXT: vsll.vx v28, v8, a1
1005 ; RV32-NEXT: vand.vx v16, v16, a0
1006 ; RV32-NEXT: vlse64.v v20, (a5), zero
1007 ; RV32-NEXT: vor.vv v16, v16, v12
1008 ; RV32-NEXT: vand.vx v12, v8, a0
1009 ; RV32-NEXT: vsll.vx v12, v12, a2
1010 ; RV32-NEXT: vor.vv v12, v28, v12
1011 ; RV32-NEXT: vsrl.vi v28, v8, 8
1012 ; RV32-NEXT: vand.vx v24, v24, a4
1013 ; RV32-NEXT: vand.vv v28, v28, v20
1014 ; RV32-NEXT: vor.vv v24, v28, v24
1015 ; RV32-NEXT: lui a0, 61681
1016 ; RV32-NEXT: lui a1, 209715
1017 ; RV32-NEXT: lui a2, 349525
1018 ; RV32-NEXT: vand.vv v20, v8, v20
1019 ; RV32-NEXT: vand.vx v8, v8, a4
1020 ; RV32-NEXT: addi a0, a0, -241
1021 ; RV32-NEXT: addi a1, a1, 819
1022 ; RV32-NEXT: addi a2, a2, 1365
1023 ; RV32-NEXT: vsll.vi v8, v8, 24
1024 ; RV32-NEXT: vor.vv v16, v24, v16
1025 ; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, ma
1026 ; RV32-NEXT: vmv.v.x v24, a0
1027 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1028 ; RV32-NEXT: vsll.vi v20, v20, 8
1029 ; RV32-NEXT: vor.vv v8, v8, v20
1030 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1031 ; RV32-NEXT: vmv.v.x v20, a1
1032 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1033 ; RV32-NEXT: vor.vv v8, v12, v8
1034 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1035 ; RV32-NEXT: vmv.v.x v12, a2
1036 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1037 ; RV32-NEXT: vor.vv v8, v8, v16
1038 ; RV32-NEXT: vsrl.vi v16, v8, 4
1039 ; RV32-NEXT: vand.vv v8, v8, v24
1040 ; RV32-NEXT: vand.vv v16, v16, v24
1041 ; RV32-NEXT: vsll.vi v8, v8, 4
1042 ; RV32-NEXT: vor.vv v8, v16, v8
1043 ; RV32-NEXT: vsrl.vi v16, v8, 2
1044 ; RV32-NEXT: vand.vv v8, v8, v20
1045 ; RV32-NEXT: vand.vv v16, v16, v20
1046 ; RV32-NEXT: vsll.vi v8, v8, 2
1047 ; RV32-NEXT: vor.vv v8, v16, v8
1048 ; RV32-NEXT: vsrl.vi v16, v8, 1
1049 ; RV32-NEXT: vand.vv v8, v8, v12
1050 ; RV32-NEXT: vand.vv v12, v16, v12
1051 ; RV32-NEXT: vadd.vv v8, v8, v8
1052 ; RV32-NEXT: vor.vv v8, v12, v8
1053 ; RV32-NEXT: addi sp, sp, 16
1054 ; RV32-NEXT: .cfi_def_cfa_offset 0
1057 ; RV64-LABEL: bitreverse_nxv4i64:
1059 ; RV64-NEXT: li a1, 56
1060 ; RV64-NEXT: li a0, 40
1061 ; RV64-NEXT: lui a2, 16
1062 ; RV64-NEXT: vsetvli a3, zero, e64, m4, ta, ma
1063 ; RV64-NEXT: vsrl.vi v16, v8, 24
1064 ; RV64-NEXT: lui a3, 4080
1065 ; RV64-NEXT: vsrl.vx v12, v8, a1
1066 ; RV64-NEXT: vsrl.vx v20, v8, a0
1067 ; RV64-NEXT: addiw a2, a2, -256
1068 ; RV64-NEXT: vand.vx v20, v20, a2
1069 ; RV64-NEXT: vor.vv v12, v20, v12
1070 ; RV64-NEXT: vsrl.vi v20, v8, 8
1071 ; RV64-NEXT: li a4, 255
1072 ; RV64-NEXT: vand.vx v16, v16, a3
1073 ; RV64-NEXT: slli a4, a4, 24
1074 ; RV64-NEXT: vand.vx v20, v20, a4
1075 ; RV64-NEXT: vor.vv v20, v20, v16
1076 ; RV64-NEXT: vand.vx v16, v8, a3
1077 ; RV64-NEXT: lui a3, 61681
1078 ; RV64-NEXT: vor.vv v12, v20, v12
1079 ; RV64-NEXT: vand.vx v20, v8, a4
1080 ; RV64-NEXT: lui a4, 209715
1081 ; RV64-NEXT: vsll.vi v16, v16, 24
1082 ; RV64-NEXT: vsll.vi v20, v20, 8
1083 ; RV64-NEXT: vor.vv v16, v16, v20
1084 ; RV64-NEXT: vsll.vx v20, v8, a1
1085 ; RV64-NEXT: lui a1, 349525
1086 ; RV64-NEXT: addiw a3, a3, -241
1087 ; RV64-NEXT: addiw a4, a4, 819
1088 ; RV64-NEXT: addiw a1, a1, 1365
1089 ; RV64-NEXT: vand.vx v8, v8, a2
1090 ; RV64-NEXT: slli a2, a3, 32
1091 ; RV64-NEXT: vsll.vx v8, v8, a0
1092 ; RV64-NEXT: slli a0, a4, 32
1093 ; RV64-NEXT: add a2, a3, a2
1094 ; RV64-NEXT: slli a3, a1, 32
1095 ; RV64-NEXT: add a0, a4, a0
1096 ; RV64-NEXT: add a1, a1, a3
1097 ; RV64-NEXT: vor.vv v8, v20, v8
1098 ; RV64-NEXT: vor.vv v8, v8, v16
1099 ; RV64-NEXT: vor.vv v8, v8, v12
1100 ; RV64-NEXT: vsrl.vi v12, v8, 4
1101 ; RV64-NEXT: vand.vx v8, v8, a2
1102 ; RV64-NEXT: vand.vx v12, v12, a2
1103 ; RV64-NEXT: vsll.vi v8, v8, 4
1104 ; RV64-NEXT: vor.vv v8, v12, v8
1105 ; RV64-NEXT: vsrl.vi v12, v8, 2
1106 ; RV64-NEXT: vand.vx v8, v8, a0
1107 ; RV64-NEXT: vand.vx v12, v12, a0
1108 ; RV64-NEXT: vsll.vi v8, v8, 2
1109 ; RV64-NEXT: vor.vv v8, v12, v8
1110 ; RV64-NEXT: vsrl.vi v12, v8, 1
1111 ; RV64-NEXT: vand.vx v8, v8, a1
1112 ; RV64-NEXT: vand.vx v12, v12, a1
1113 ; RV64-NEXT: vadd.vv v8, v8, v8
1114 ; RV64-NEXT: vor.vv v8, v12, v8
1117 ; CHECK-ZVBB-LABEL: bitreverse_nxv4i64:
1118 ; CHECK-ZVBB: # %bb.0:
1119 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1120 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1121 ; CHECK-ZVBB-NEXT: ret
1122 %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va)
1123 ret <vscale x 4 x i64> %a
1125 declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
1127 define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) {
1128 ; RV32-LABEL: bitreverse_nxv8i64:
1130 ; RV32-NEXT: addi sp, sp, -16
1131 ; RV32-NEXT: .cfi_def_cfa_offset 16
1132 ; RV32-NEXT: csrr a0, vlenb
1133 ; RV32-NEXT: slli a0, a0, 4
1134 ; RV32-NEXT: sub sp, sp, a0
1135 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1136 ; RV32-NEXT: lui a0, 1044480
1137 ; RV32-NEXT: li a1, 56
1138 ; RV32-NEXT: li a2, 40
1139 ; RV32-NEXT: lui a3, 16
1140 ; RV32-NEXT: lui a4, 4080
1141 ; RV32-NEXT: addi a5, sp, 8
1142 ; RV32-NEXT: sw a0, 8(sp)
1143 ; RV32-NEXT: sw zero, 12(sp)
1144 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1145 ; RV32-NEXT: vsrl.vx v16, v8, a1
1146 ; RV32-NEXT: vsrl.vx v24, v8, a2
1147 ; RV32-NEXT: addi a0, a3, -256
1148 ; RV32-NEXT: vsll.vx v0, v8, a1
1149 ; RV32-NEXT: vand.vx v24, v24, a0
1150 ; RV32-NEXT: vor.vv v16, v24, v16
1151 ; RV32-NEXT: addi a1, sp, 16
1152 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1153 ; RV32-NEXT: vand.vx v16, v8, a0
1154 ; RV32-NEXT: vsll.vx v16, v16, a2
1155 ; RV32-NEXT: vor.vv v16, v0, v16
1156 ; RV32-NEXT: csrr a0, vlenb
1157 ; RV32-NEXT: slli a0, a0, 3
1158 ; RV32-NEXT: add a0, sp, a0
1159 ; RV32-NEXT: addi a0, a0, 16
1160 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1161 ; RV32-NEXT: vlse64.v v0, (a5), zero
1162 ; RV32-NEXT: vsrl.vi v16, v8, 24
1163 ; RV32-NEXT: vand.vx v16, v16, a4
1164 ; RV32-NEXT: vsrl.vi v24, v8, 8
1165 ; RV32-NEXT: vand.vv v24, v24, v0
1166 ; RV32-NEXT: vor.vv v16, v24, v16
1167 ; RV32-NEXT: addi a0, sp, 16
1168 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1169 ; RV32-NEXT: vor.vv v24, v16, v24
1170 ; RV32-NEXT: vand.vv v16, v8, v0
1171 ; RV32-NEXT: vand.vx v8, v8, a4
1172 ; RV32-NEXT: vsll.vi v8, v8, 24
1173 ; RV32-NEXT: vsll.vi v16, v16, 8
1174 ; RV32-NEXT: vor.vv v8, v8, v16
1175 ; RV32-NEXT: lui a0, 61681
1176 ; RV32-NEXT: lui a1, 209715
1177 ; RV32-NEXT: lui a2, 349525
1178 ; RV32-NEXT: addi a0, a0, -241
1179 ; RV32-NEXT: addi a1, a1, 819
1180 ; RV32-NEXT: addi a2, a2, 1365
1181 ; RV32-NEXT: csrr a3, vlenb
1182 ; RV32-NEXT: slli a3, a3, 3
1183 ; RV32-NEXT: add a3, sp, a3
1184 ; RV32-NEXT: addi a3, a3, 16
1185 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1186 ; RV32-NEXT: vor.vv v8, v16, v8
1187 ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma
1188 ; RV32-NEXT: vmv.v.x v16, a0
1189 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1190 ; RV32-NEXT: vor.vv v8, v8, v24
1191 ; RV32-NEXT: vsrl.vi v24, v8, 4
1192 ; RV32-NEXT: vand.vv v8, v8, v16
1193 ; RV32-NEXT: vand.vv v16, v24, v16
1194 ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
1195 ; RV32-NEXT: vmv.v.x v24, a1
1196 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1197 ; RV32-NEXT: vsll.vi v8, v8, 4
1198 ; RV32-NEXT: vor.vv v8, v16, v8
1199 ; RV32-NEXT: vsrl.vi v16, v8, 2
1200 ; RV32-NEXT: vand.vv v8, v8, v24
1201 ; RV32-NEXT: vand.vv v16, v16, v24
1202 ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
1203 ; RV32-NEXT: vmv.v.x v24, a2
1204 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1205 ; RV32-NEXT: vsll.vi v8, v8, 2
1206 ; RV32-NEXT: vor.vv v8, v16, v8
1207 ; RV32-NEXT: vsrl.vi v16, v8, 1
1208 ; RV32-NEXT: vand.vv v8, v8, v24
1209 ; RV32-NEXT: vand.vv v16, v16, v24
1210 ; RV32-NEXT: vadd.vv v8, v8, v8
1211 ; RV32-NEXT: vor.vv v8, v16, v8
1212 ; RV32-NEXT: csrr a0, vlenb
1213 ; RV32-NEXT: slli a0, a0, 4
1214 ; RV32-NEXT: add sp, sp, a0
1215 ; RV32-NEXT: .cfi_def_cfa sp, 16
1216 ; RV32-NEXT: addi sp, sp, 16
1217 ; RV32-NEXT: .cfi_def_cfa_offset 0
1220 ; RV64-LABEL: bitreverse_nxv8i64:
1222 ; RV64-NEXT: li a1, 56
1223 ; RV64-NEXT: li a0, 40
1224 ; RV64-NEXT: lui a2, 16
1225 ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
1226 ; RV64-NEXT: vsrl.vi v24, v8, 24
1227 ; RV64-NEXT: lui a3, 4080
1228 ; RV64-NEXT: vsrl.vx v16, v8, a1
1229 ; RV64-NEXT: vsrl.vx v0, v8, a0
1230 ; RV64-NEXT: addiw a2, a2, -256
1231 ; RV64-NEXT: vand.vx v0, v0, a2
1232 ; RV64-NEXT: vor.vv v16, v0, v16
1233 ; RV64-NEXT: vsrl.vi v0, v8, 8
1234 ; RV64-NEXT: li a4, 255
1235 ; RV64-NEXT: vand.vx v24, v24, a3
1236 ; RV64-NEXT: slli a4, a4, 24
1237 ; RV64-NEXT: vand.vx v0, v0, a4
1238 ; RV64-NEXT: vor.vv v0, v0, v24
1239 ; RV64-NEXT: vand.vx v24, v8, a3
1240 ; RV64-NEXT: lui a3, 61681
1241 ; RV64-NEXT: vor.vv v16, v0, v16
1242 ; RV64-NEXT: vand.vx v0, v8, a4
1243 ; RV64-NEXT: lui a4, 209715
1244 ; RV64-NEXT: vsll.vi v24, v24, 24
1245 ; RV64-NEXT: vsll.vi v0, v0, 8
1246 ; RV64-NEXT: vor.vv v24, v24, v0
1247 ; RV64-NEXT: vsll.vx v0, v8, a1
1248 ; RV64-NEXT: lui a1, 349525
1249 ; RV64-NEXT: addiw a3, a3, -241
1250 ; RV64-NEXT: addiw a4, a4, 819
1251 ; RV64-NEXT: addiw a1, a1, 1365
1252 ; RV64-NEXT: vand.vx v8, v8, a2
1253 ; RV64-NEXT: slli a2, a3, 32
1254 ; RV64-NEXT: vsll.vx v8, v8, a0
1255 ; RV64-NEXT: slli a0, a4, 32
1256 ; RV64-NEXT: add a2, a3, a2
1257 ; RV64-NEXT: slli a3, a1, 32
1258 ; RV64-NEXT: add a0, a4, a0
1259 ; RV64-NEXT: add a1, a1, a3
1260 ; RV64-NEXT: vor.vv v8, v0, v8
1261 ; RV64-NEXT: vor.vv v8, v8, v24
1262 ; RV64-NEXT: vor.vv v8, v8, v16
1263 ; RV64-NEXT: vsrl.vi v16, v8, 4
1264 ; RV64-NEXT: vand.vx v8, v8, a2
1265 ; RV64-NEXT: vand.vx v16, v16, a2
1266 ; RV64-NEXT: vsll.vi v8, v8, 4
1267 ; RV64-NEXT: vor.vv v8, v16, v8
1268 ; RV64-NEXT: vsrl.vi v16, v8, 2
1269 ; RV64-NEXT: vand.vx v8, v8, a0
1270 ; RV64-NEXT: vand.vx v16, v16, a0
1271 ; RV64-NEXT: vsll.vi v8, v8, 2
1272 ; RV64-NEXT: vor.vv v8, v16, v8
1273 ; RV64-NEXT: vsrl.vi v16, v8, 1
1274 ; RV64-NEXT: vand.vx v8, v8, a1
1275 ; RV64-NEXT: vand.vx v16, v16, a1
1276 ; RV64-NEXT: vadd.vv v8, v8, v8
1277 ; RV64-NEXT: vor.vv v8, v16, v8
1280 ; CHECK-ZVBB-LABEL: bitreverse_nxv8i64:
1281 ; CHECK-ZVBB: # %bb.0:
1282 ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1283 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8
1284 ; CHECK-ZVBB-NEXT: ret
1285 %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va)
1286 ret <vscale x 8 x i64> %a
1288 declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)