1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
7 define void @bitreverse_v8i16(ptr %x, ptr %y) {
8 ; CHECK-LABEL: bitreverse_v8i16:
10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11 ; CHECK-NEXT: vle16.v v8, (a0)
12 ; CHECK-NEXT: lui a1, 1
13 ; CHECK-NEXT: addi a1, a1, -241
14 ; CHECK-NEXT: vsrl.vi v9, v8, 8
15 ; CHECK-NEXT: vsll.vi v8, v8, 8
16 ; CHECK-NEXT: vor.vv v8, v8, v9
17 ; CHECK-NEXT: vsrl.vi v9, v8, 4
18 ; CHECK-NEXT: vand.vx v8, v8, a1
19 ; CHECK-NEXT: vand.vx v9, v9, a1
20 ; CHECK-NEXT: lui a1, 3
21 ; CHECK-NEXT: addi a1, a1, 819
22 ; CHECK-NEXT: vsll.vi v8, v8, 4
23 ; CHECK-NEXT: vor.vv v8, v9, v8
24 ; CHECK-NEXT: vsrl.vi v9, v8, 2
25 ; CHECK-NEXT: vand.vx v8, v8, a1
26 ; CHECK-NEXT: vand.vx v9, v9, a1
27 ; CHECK-NEXT: lui a1, 5
28 ; CHECK-NEXT: addi a1, a1, 1365
29 ; CHECK-NEXT: vsll.vi v8, v8, 2
30 ; CHECK-NEXT: vor.vv v8, v9, v8
31 ; CHECK-NEXT: vsrl.vi v9, v8, 1
32 ; CHECK-NEXT: vand.vx v8, v8, a1
33 ; CHECK-NEXT: vand.vx v9, v9, a1
34 ; CHECK-NEXT: vadd.vv v8, v8, v8
35 ; CHECK-NEXT: vor.vv v8, v9, v8
36 ; CHECK-NEXT: vse16.v v8, (a0)
39 ; ZVBB-LABEL: bitreverse_v8i16:
41 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
42 ; ZVBB-NEXT: vle16.v v8, (a0)
43 ; ZVBB-NEXT: vbrev.v v8, v8
44 ; ZVBB-NEXT: vse16.v v8, (a0)
46 %a = load <8 x i16>, ptr %x
47 %b = load <8 x i16>, ptr %y
48 %c = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
49 store <8 x i16> %c, ptr %x
52 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
54 define void @bitreverse_v4i32(ptr %x, ptr %y) {
55 ; CHECK-LABEL: bitreverse_v4i32:
57 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
58 ; CHECK-NEXT: vle32.v v8, (a0)
59 ; CHECK-NEXT: lui a1, 16
60 ; CHECK-NEXT: addi a1, a1, -256
61 ; CHECK-NEXT: vsrl.vi v9, v8, 8
62 ; CHECK-NEXT: vsrl.vi v10, v8, 24
63 ; CHECK-NEXT: vand.vx v9, v9, a1
64 ; CHECK-NEXT: vor.vv v9, v9, v10
65 ; CHECK-NEXT: vand.vx v10, v8, a1
66 ; CHECK-NEXT: lui a1, 61681
67 ; CHECK-NEXT: addi a1, a1, -241
68 ; CHECK-NEXT: vsll.vi v8, v8, 24
69 ; CHECK-NEXT: vsll.vi v10, v10, 8
70 ; CHECK-NEXT: vor.vv v8, v8, v10
71 ; CHECK-NEXT: vor.vv v8, v8, v9
72 ; CHECK-NEXT: vsrl.vi v9, v8, 4
73 ; CHECK-NEXT: vand.vx v8, v8, a1
74 ; CHECK-NEXT: vand.vx v9, v9, a1
75 ; CHECK-NEXT: lui a1, 209715
76 ; CHECK-NEXT: addi a1, a1, 819
77 ; CHECK-NEXT: vsll.vi v8, v8, 4
78 ; CHECK-NEXT: vor.vv v8, v9, v8
79 ; CHECK-NEXT: vsrl.vi v9, v8, 2
80 ; CHECK-NEXT: vand.vx v8, v8, a1
81 ; CHECK-NEXT: vand.vx v9, v9, a1
82 ; CHECK-NEXT: lui a1, 349525
83 ; CHECK-NEXT: addi a1, a1, 1365
84 ; CHECK-NEXT: vsll.vi v8, v8, 2
85 ; CHECK-NEXT: vor.vv v8, v9, v8
86 ; CHECK-NEXT: vsrl.vi v9, v8, 1
87 ; CHECK-NEXT: vand.vx v8, v8, a1
88 ; CHECK-NEXT: vand.vx v9, v9, a1
89 ; CHECK-NEXT: vadd.vv v8, v8, v8
90 ; CHECK-NEXT: vor.vv v8, v9, v8
91 ; CHECK-NEXT: vse32.v v8, (a0)
94 ; ZVBB-LABEL: bitreverse_v4i32:
96 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
97 ; ZVBB-NEXT: vle32.v v8, (a0)
98 ; ZVBB-NEXT: vbrev.v v8, v8
99 ; ZVBB-NEXT: vse32.v v8, (a0)
101 %a = load <4 x i32>, ptr %x
102 %b = load <4 x i32>, ptr %y
103 %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
104 store <4 x i32> %c, ptr %x
107 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
109 define void @bitreverse_v2i64(ptr %x, ptr %y) {
110 ; RV32-LABEL: bitreverse_v2i64:
112 ; RV32-NEXT: addi sp, sp, -16
113 ; RV32-NEXT: .cfi_def_cfa_offset 16
114 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
115 ; RV32-NEXT: vle64.v v8, (a0)
116 ; RV32-NEXT: lui a2, 1044480
117 ; RV32-NEXT: li a3, 56
118 ; RV32-NEXT: li a4, 40
119 ; RV32-NEXT: lui a5, 16
120 ; RV32-NEXT: lui a1, 4080
121 ; RV32-NEXT: addi a6, sp, 8
122 ; RV32-NEXT: sw a2, 8(sp)
123 ; RV32-NEXT: sw zero, 12(sp)
124 ; RV32-NEXT: addi a2, a5, -256
125 ; RV32-NEXT: vlse64.v v9, (a6), zero
126 ; RV32-NEXT: vsrl.vx v10, v8, a3
127 ; RV32-NEXT: vsrl.vx v11, v8, a4
128 ; RV32-NEXT: vsrl.vi v12, v8, 24
129 ; RV32-NEXT: vsll.vx v13, v8, a3
130 ; RV32-NEXT: vand.vx v11, v11, a2
131 ; RV32-NEXT: vor.vv v10, v11, v10
132 ; RV32-NEXT: vand.vx v11, v8, a2
133 ; RV32-NEXT: vsll.vx v11, v11, a4
134 ; RV32-NEXT: vor.vv v11, v13, v11
135 ; RV32-NEXT: vsrl.vi v13, v8, 8
136 ; RV32-NEXT: vand.vx v12, v12, a1
137 ; RV32-NEXT: vand.vv v13, v13, v9
138 ; RV32-NEXT: vor.vv v12, v13, v12
139 ; RV32-NEXT: lui a2, 61681
140 ; RV32-NEXT: lui a3, 209715
141 ; RV32-NEXT: lui a4, 349525
142 ; RV32-NEXT: addi a2, a2, -241
143 ; RV32-NEXT: addi a3, a3, 819
144 ; RV32-NEXT: addi a4, a4, 1365
145 ; RV32-NEXT: vor.vv v10, v12, v10
146 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
147 ; RV32-NEXT: vmv.v.x v12, a2
148 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
149 ; RV32-NEXT: vand.vv v9, v8, v9
150 ; RV32-NEXT: vand.vx v8, v8, a1
151 ; RV32-NEXT: vsll.vi v8, v8, 24
152 ; RV32-NEXT: vsll.vi v9, v9, 8
153 ; RV32-NEXT: vor.vv v8, v8, v9
154 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
155 ; RV32-NEXT: vmv.v.x v9, a3
156 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
157 ; RV32-NEXT: vor.vv v8, v11, v8
158 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
159 ; RV32-NEXT: vmv.v.x v11, a4
160 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
161 ; RV32-NEXT: vor.vv v8, v8, v10
162 ; RV32-NEXT: vsrl.vi v10, v8, 4
163 ; RV32-NEXT: vand.vv v8, v8, v12
164 ; RV32-NEXT: vand.vv v10, v10, v12
165 ; RV32-NEXT: vsll.vi v8, v8, 4
166 ; RV32-NEXT: vor.vv v8, v10, v8
167 ; RV32-NEXT: vsrl.vi v10, v8, 2
168 ; RV32-NEXT: vand.vv v8, v8, v9
169 ; RV32-NEXT: vand.vv v9, v10, v9
170 ; RV32-NEXT: vsll.vi v8, v8, 2
171 ; RV32-NEXT: vor.vv v8, v9, v8
172 ; RV32-NEXT: vsrl.vi v9, v8, 1
173 ; RV32-NEXT: vand.vv v8, v8, v11
174 ; RV32-NEXT: vand.vv v9, v9, v11
175 ; RV32-NEXT: vadd.vv v8, v8, v8
176 ; RV32-NEXT: vor.vv v8, v9, v8
177 ; RV32-NEXT: vse64.v v8, (a0)
178 ; RV32-NEXT: addi sp, sp, 16
179 ; RV32-NEXT: .cfi_def_cfa_offset 0
182 ; RV64-LABEL: bitreverse_v2i64:
184 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
185 ; RV64-NEXT: vle64.v v8, (a0)
186 ; RV64-NEXT: li a1, 56
187 ; RV64-NEXT: li a2, 40
188 ; RV64-NEXT: lui a3, 16
189 ; RV64-NEXT: lui a4, 4080
190 ; RV64-NEXT: li a5, 255
191 ; RV64-NEXT: addiw a3, a3, -256
192 ; RV64-NEXT: slli a5, a5, 24
193 ; RV64-NEXT: vsrl.vx v9, v8, a1
194 ; RV64-NEXT: vsrl.vx v10, v8, a2
195 ; RV64-NEXT: vsrl.vi v11, v8, 24
196 ; RV64-NEXT: vsrl.vi v12, v8, 8
197 ; RV64-NEXT: vand.vx v10, v10, a3
198 ; RV64-NEXT: vor.vv v9, v10, v9
199 ; RV64-NEXT: vand.vx v10, v8, a5
200 ; RV64-NEXT: vand.vx v11, v11, a4
201 ; RV64-NEXT: vand.vx v12, v12, a5
202 ; RV64-NEXT: vor.vv v11, v12, v11
203 ; RV64-NEXT: vand.vx v12, v8, a4
204 ; RV64-NEXT: vsll.vi v10, v10, 8
205 ; RV64-NEXT: vsll.vi v12, v12, 24
206 ; RV64-NEXT: vor.vv v10, v12, v10
207 ; RV64-NEXT: vsll.vx v12, v8, a1
208 ; RV64-NEXT: vand.vx v8, v8, a3
209 ; RV64-NEXT: vsll.vx v8, v8, a2
210 ; RV64-NEXT: vor.vv v8, v12, v8
211 ; RV64-NEXT: lui a1, 61681
212 ; RV64-NEXT: lui a2, 209715
213 ; RV64-NEXT: lui a3, 349525
214 ; RV64-NEXT: addiw a1, a1, -241
215 ; RV64-NEXT: addiw a2, a2, 819
216 ; RV64-NEXT: addiw a3, a3, 1365
217 ; RV64-NEXT: slli a4, a1, 32
218 ; RV64-NEXT: slli a5, a2, 32
219 ; RV64-NEXT: add a1, a1, a4
220 ; RV64-NEXT: slli a4, a3, 32
221 ; RV64-NEXT: add a2, a2, a5
222 ; RV64-NEXT: add a3, a3, a4
223 ; RV64-NEXT: vor.vv v9, v11, v9
224 ; RV64-NEXT: vor.vv v8, v8, v10
225 ; RV64-NEXT: vor.vv v8, v8, v9
226 ; RV64-NEXT: vsrl.vi v9, v8, 4
227 ; RV64-NEXT: vand.vx v8, v8, a1
228 ; RV64-NEXT: vand.vx v9, v9, a1
229 ; RV64-NEXT: vsll.vi v8, v8, 4
230 ; RV64-NEXT: vor.vv v8, v9, v8
231 ; RV64-NEXT: vsrl.vi v9, v8, 2
232 ; RV64-NEXT: vand.vx v8, v8, a2
233 ; RV64-NEXT: vand.vx v9, v9, a2
234 ; RV64-NEXT: vsll.vi v8, v8, 2
235 ; RV64-NEXT: vor.vv v8, v9, v8
236 ; RV64-NEXT: vsrl.vi v9, v8, 1
237 ; RV64-NEXT: vand.vx v8, v8, a3
238 ; RV64-NEXT: vand.vx v9, v9, a3
239 ; RV64-NEXT: vadd.vv v8, v8, v8
240 ; RV64-NEXT: vor.vv v8, v9, v8
241 ; RV64-NEXT: vse64.v v8, (a0)
244 ; ZVBB-LABEL: bitreverse_v2i64:
246 ; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
247 ; ZVBB-NEXT: vle64.v v8, (a0)
248 ; ZVBB-NEXT: vbrev.v v8, v8
249 ; ZVBB-NEXT: vse64.v v8, (a0)
251 %a = load <2 x i64>, ptr %x
252 %b = load <2 x i64>, ptr %y
253 %c = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
254 store <2 x i64> %c, ptr %x
257 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
259 define void @bitreverse_v16i16(ptr %x, ptr %y) {
260 ; CHECK-LABEL: bitreverse_v16i16:
262 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
263 ; CHECK-NEXT: vle16.v v8, (a0)
264 ; CHECK-NEXT: lui a1, 1
265 ; CHECK-NEXT: addi a1, a1, -241
266 ; CHECK-NEXT: vsrl.vi v10, v8, 8
267 ; CHECK-NEXT: vsll.vi v8, v8, 8
268 ; CHECK-NEXT: vor.vv v8, v8, v10
269 ; CHECK-NEXT: vsrl.vi v10, v8, 4
270 ; CHECK-NEXT: vand.vx v8, v8, a1
271 ; CHECK-NEXT: vand.vx v10, v10, a1
272 ; CHECK-NEXT: lui a1, 3
273 ; CHECK-NEXT: addi a1, a1, 819
274 ; CHECK-NEXT: vsll.vi v8, v8, 4
275 ; CHECK-NEXT: vor.vv v8, v10, v8
276 ; CHECK-NEXT: vsrl.vi v10, v8, 2
277 ; CHECK-NEXT: vand.vx v8, v8, a1
278 ; CHECK-NEXT: vand.vx v10, v10, a1
279 ; CHECK-NEXT: lui a1, 5
280 ; CHECK-NEXT: addi a1, a1, 1365
281 ; CHECK-NEXT: vsll.vi v8, v8, 2
282 ; CHECK-NEXT: vor.vv v8, v10, v8
283 ; CHECK-NEXT: vsrl.vi v10, v8, 1
284 ; CHECK-NEXT: vand.vx v8, v8, a1
285 ; CHECK-NEXT: vand.vx v10, v10, a1
286 ; CHECK-NEXT: vadd.vv v8, v8, v8
287 ; CHECK-NEXT: vor.vv v8, v10, v8
288 ; CHECK-NEXT: vse16.v v8, (a0)
291 ; ZVBB-LABEL: bitreverse_v16i16:
293 ; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
294 ; ZVBB-NEXT: vle16.v v8, (a0)
295 ; ZVBB-NEXT: vbrev.v v8, v8
296 ; ZVBB-NEXT: vse16.v v8, (a0)
298 %a = load <16 x i16>, ptr %x
299 %b = load <16 x i16>, ptr %y
300 %c = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
301 store <16 x i16> %c, ptr %x
304 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
306 define void @bitreverse_v8i32(ptr %x, ptr %y) {
307 ; CHECK-LABEL: bitreverse_v8i32:
309 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
310 ; CHECK-NEXT: vle32.v v8, (a0)
311 ; CHECK-NEXT: lui a1, 16
312 ; CHECK-NEXT: addi a1, a1, -256
313 ; CHECK-NEXT: vsrl.vi v10, v8, 8
314 ; CHECK-NEXT: vsrl.vi v12, v8, 24
315 ; CHECK-NEXT: vand.vx v10, v10, a1
316 ; CHECK-NEXT: vor.vv v10, v10, v12
317 ; CHECK-NEXT: vand.vx v12, v8, a1
318 ; CHECK-NEXT: lui a1, 61681
319 ; CHECK-NEXT: addi a1, a1, -241
320 ; CHECK-NEXT: vsll.vi v8, v8, 24
321 ; CHECK-NEXT: vsll.vi v12, v12, 8
322 ; CHECK-NEXT: vor.vv v8, v8, v12
323 ; CHECK-NEXT: vor.vv v8, v8, v10
324 ; CHECK-NEXT: vsrl.vi v10, v8, 4
325 ; CHECK-NEXT: vand.vx v8, v8, a1
326 ; CHECK-NEXT: vand.vx v10, v10, a1
327 ; CHECK-NEXT: lui a1, 209715
328 ; CHECK-NEXT: addi a1, a1, 819
329 ; CHECK-NEXT: vsll.vi v8, v8, 4
330 ; CHECK-NEXT: vor.vv v8, v10, v8
331 ; CHECK-NEXT: vsrl.vi v10, v8, 2
332 ; CHECK-NEXT: vand.vx v8, v8, a1
333 ; CHECK-NEXT: vand.vx v10, v10, a1
334 ; CHECK-NEXT: lui a1, 349525
335 ; CHECK-NEXT: addi a1, a1, 1365
336 ; CHECK-NEXT: vsll.vi v8, v8, 2
337 ; CHECK-NEXT: vor.vv v8, v10, v8
338 ; CHECK-NEXT: vsrl.vi v10, v8, 1
339 ; CHECK-NEXT: vand.vx v8, v8, a1
340 ; CHECK-NEXT: vand.vx v10, v10, a1
341 ; CHECK-NEXT: vadd.vv v8, v8, v8
342 ; CHECK-NEXT: vor.vv v8, v10, v8
343 ; CHECK-NEXT: vse32.v v8, (a0)
346 ; ZVBB-LABEL: bitreverse_v8i32:
348 ; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
349 ; ZVBB-NEXT: vle32.v v8, (a0)
350 ; ZVBB-NEXT: vbrev.v v8, v8
351 ; ZVBB-NEXT: vse32.v v8, (a0)
353 %a = load <8 x i32>, ptr %x
354 %b = load <8 x i32>, ptr %y
355 %c = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
356 store <8 x i32> %c, ptr %x
359 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
361 define void @bitreverse_v4i64(ptr %x, ptr %y) {
362 ; RV32-LABEL: bitreverse_v4i64:
364 ; RV32-NEXT: addi sp, sp, -16
365 ; RV32-NEXT: .cfi_def_cfa_offset 16
366 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
367 ; RV32-NEXT: vle64.v v8, (a0)
368 ; RV32-NEXT: lui a2, 1044480
369 ; RV32-NEXT: li a3, 56
370 ; RV32-NEXT: li a4, 40
371 ; RV32-NEXT: lui a5, 16
372 ; RV32-NEXT: lui a1, 4080
373 ; RV32-NEXT: addi a6, sp, 8
374 ; RV32-NEXT: sw a2, 8(sp)
375 ; RV32-NEXT: sw zero, 12(sp)
376 ; RV32-NEXT: addi a2, a5, -256
377 ; RV32-NEXT: vlse64.v v10, (a6), zero
378 ; RV32-NEXT: vsrl.vx v12, v8, a3
379 ; RV32-NEXT: vsrl.vx v14, v8, a4
380 ; RV32-NEXT: vsrl.vi v16, v8, 24
381 ; RV32-NEXT: vsll.vx v18, v8, a3
382 ; RV32-NEXT: vand.vx v14, v14, a2
383 ; RV32-NEXT: vor.vv v14, v14, v12
384 ; RV32-NEXT: vand.vx v12, v8, a2
385 ; RV32-NEXT: vsll.vx v12, v12, a4
386 ; RV32-NEXT: vor.vv v12, v18, v12
387 ; RV32-NEXT: vsrl.vi v18, v8, 8
388 ; RV32-NEXT: vand.vx v16, v16, a1
389 ; RV32-NEXT: vand.vv v18, v18, v10
390 ; RV32-NEXT: vor.vv v16, v18, v16
391 ; RV32-NEXT: lui a2, 61681
392 ; RV32-NEXT: lui a3, 209715
393 ; RV32-NEXT: lui a4, 349525
394 ; RV32-NEXT: addi a2, a2, -241
395 ; RV32-NEXT: addi a3, a3, 819
396 ; RV32-NEXT: addi a4, a4, 1365
397 ; RV32-NEXT: vor.vv v14, v16, v14
398 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
399 ; RV32-NEXT: vmv.v.x v16, a2
400 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
401 ; RV32-NEXT: vand.vv v10, v8, v10
402 ; RV32-NEXT: vand.vx v8, v8, a1
403 ; RV32-NEXT: vsll.vi v8, v8, 24
404 ; RV32-NEXT: vsll.vi v10, v10, 8
405 ; RV32-NEXT: vor.vv v8, v8, v10
406 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
407 ; RV32-NEXT: vmv.v.x v10, a3
408 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
409 ; RV32-NEXT: vor.vv v8, v12, v8
410 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
411 ; RV32-NEXT: vmv.v.x v12, a4
412 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
413 ; RV32-NEXT: vor.vv v8, v8, v14
414 ; RV32-NEXT: vsrl.vi v14, v8, 4
415 ; RV32-NEXT: vand.vv v8, v8, v16
416 ; RV32-NEXT: vand.vv v14, v14, v16
417 ; RV32-NEXT: vsll.vi v8, v8, 4
418 ; RV32-NEXT: vor.vv v8, v14, v8
419 ; RV32-NEXT: vsrl.vi v14, v8, 2
420 ; RV32-NEXT: vand.vv v8, v8, v10
421 ; RV32-NEXT: vand.vv v10, v14, v10
422 ; RV32-NEXT: vsll.vi v8, v8, 2
423 ; RV32-NEXT: vor.vv v8, v10, v8
424 ; RV32-NEXT: vsrl.vi v10, v8, 1
425 ; RV32-NEXT: vand.vv v8, v8, v12
426 ; RV32-NEXT: vand.vv v10, v10, v12
427 ; RV32-NEXT: vadd.vv v8, v8, v8
428 ; RV32-NEXT: vor.vv v8, v10, v8
429 ; RV32-NEXT: vse64.v v8, (a0)
430 ; RV32-NEXT: addi sp, sp, 16
431 ; RV32-NEXT: .cfi_def_cfa_offset 0
434 ; RV64-LABEL: bitreverse_v4i64:
436 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
437 ; RV64-NEXT: vle64.v v14, (a0)
438 ; RV64-NEXT: li a1, 56
439 ; RV64-NEXT: li a2, 40
440 ; RV64-NEXT: lui a3, 16
441 ; RV64-NEXT: lui a4, 4080
442 ; RV64-NEXT: li a5, 255
443 ; RV64-NEXT: addiw a3, a3, -256
444 ; RV64-NEXT: slli a5, a5, 24
445 ; RV64-NEXT: vsrl.vx v8, v14, a1
446 ; RV64-NEXT: vsrl.vx v10, v14, a2
447 ; RV64-NEXT: vsrl.vi v12, v14, 24
448 ; RV64-NEXT: vsrl.vi v16, v14, 8
449 ; RV64-NEXT: vand.vx v10, v10, a3
450 ; RV64-NEXT: vor.vv v8, v10, v8
451 ; RV64-NEXT: vand.vx v18, v14, a5
452 ; RV64-NEXT: vand.vx v10, v12, a4
453 ; RV64-NEXT: vand.vx v12, v16, a5
454 ; RV64-NEXT: vor.vv v10, v12, v10
455 ; RV64-NEXT: vand.vx v12, v14, a4
456 ; RV64-NEXT: vsll.vi v16, v18, 8
457 ; RV64-NEXT: vsll.vi v12, v12, 24
458 ; RV64-NEXT: vor.vv v12, v12, v16
459 ; RV64-NEXT: vsll.vx v16, v14, a1
460 ; RV64-NEXT: vand.vx v14, v14, a3
461 ; RV64-NEXT: vsll.vx v14, v14, a2
462 ; RV64-NEXT: vor.vv v14, v16, v14
463 ; RV64-NEXT: lui a1, 61681
464 ; RV64-NEXT: lui a2, 209715
465 ; RV64-NEXT: lui a3, 349525
466 ; RV64-NEXT: addiw a1, a1, -241
467 ; RV64-NEXT: addiw a2, a2, 819
468 ; RV64-NEXT: addiw a3, a3, 1365
469 ; RV64-NEXT: slli a4, a1, 32
470 ; RV64-NEXT: slli a5, a2, 32
471 ; RV64-NEXT: add a1, a1, a4
472 ; RV64-NEXT: slli a4, a3, 32
473 ; RV64-NEXT: add a2, a2, a5
474 ; RV64-NEXT: add a3, a3, a4
475 ; RV64-NEXT: vor.vv v8, v10, v8
476 ; RV64-NEXT: vor.vv v10, v14, v12
477 ; RV64-NEXT: vor.vv v8, v10, v8
478 ; RV64-NEXT: vsrl.vi v10, v8, 4
479 ; RV64-NEXT: vand.vx v8, v8, a1
480 ; RV64-NEXT: vand.vx v10, v10, a1
481 ; RV64-NEXT: vsll.vi v8, v8, 4
482 ; RV64-NEXT: vor.vv v8, v10, v8
483 ; RV64-NEXT: vsrl.vi v10, v8, 2
484 ; RV64-NEXT: vand.vx v8, v8, a2
485 ; RV64-NEXT: vand.vx v10, v10, a2
486 ; RV64-NEXT: vsll.vi v8, v8, 2
487 ; RV64-NEXT: vor.vv v8, v10, v8
488 ; RV64-NEXT: vsrl.vi v10, v8, 1
489 ; RV64-NEXT: vand.vx v8, v8, a3
490 ; RV64-NEXT: vand.vx v10, v10, a3
491 ; RV64-NEXT: vadd.vv v8, v8, v8
492 ; RV64-NEXT: vor.vv v8, v10, v8
493 ; RV64-NEXT: vse64.v v8, (a0)
496 ; ZVBB-LABEL: bitreverse_v4i64:
498 ; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
499 ; ZVBB-NEXT: vle64.v v8, (a0)
500 ; ZVBB-NEXT: vbrev.v v8, v8
501 ; ZVBB-NEXT: vse64.v v8, (a0)
503 %a = load <4 x i64>, ptr %x
504 %b = load <4 x i64>, ptr %y
505 %c = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
506 store <4 x i64> %c, ptr %x
509 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)