1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
7 define <vscale x 1 x i16> @bswap_nxv1i16(<vscale x 1 x i16> %va) {
8 ; CHECK-LABEL: bswap_nxv1i16:
10 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
11 ; CHECK-NEXT: vsrl.vi v9, v8, 8
12 ; CHECK-NEXT: vsll.vi v8, v8, 8
13 ; CHECK-NEXT: vor.vv v8, v8, v9
16 ; CHECK-ZVKB-LABEL: bswap_nxv1i16:
17 ; CHECK-ZVKB: # %bb.0:
18 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
19 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
20 ; CHECK-ZVKB-NEXT: ret
21 %a = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> %va)
22 ret <vscale x 1 x i16> %a
24 declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
26 define <vscale x 2 x i16> @bswap_nxv2i16(<vscale x 2 x i16> %va) {
27 ; CHECK-LABEL: bswap_nxv2i16:
29 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
30 ; CHECK-NEXT: vsrl.vi v9, v8, 8
31 ; CHECK-NEXT: vsll.vi v8, v8, 8
32 ; CHECK-NEXT: vor.vv v8, v8, v9
35 ; CHECK-ZVKB-LABEL: bswap_nxv2i16:
36 ; CHECK-ZVKB: # %bb.0:
37 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
38 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
39 ; CHECK-ZVKB-NEXT: ret
40 %a = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> %va)
41 ret <vscale x 2 x i16> %a
43 declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
45 define <vscale x 4 x i16> @bswap_nxv4i16(<vscale x 4 x i16> %va) {
46 ; CHECK-LABEL: bswap_nxv4i16:
48 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
49 ; CHECK-NEXT: vsrl.vi v9, v8, 8
50 ; CHECK-NEXT: vsll.vi v8, v8, 8
51 ; CHECK-NEXT: vor.vv v8, v8, v9
54 ; CHECK-ZVKB-LABEL: bswap_nxv4i16:
55 ; CHECK-ZVKB: # %bb.0:
56 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
57 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
58 ; CHECK-ZVKB-NEXT: ret
59 %a = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> %va)
60 ret <vscale x 4 x i16> %a
62 declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
64 define <vscale x 8 x i16> @bswap_nxv8i16(<vscale x 8 x i16> %va) {
65 ; CHECK-LABEL: bswap_nxv8i16:
67 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
68 ; CHECK-NEXT: vsrl.vi v10, v8, 8
69 ; CHECK-NEXT: vsll.vi v8, v8, 8
70 ; CHECK-NEXT: vor.vv v8, v8, v10
73 ; CHECK-ZVKB-LABEL: bswap_nxv8i16:
74 ; CHECK-ZVKB: # %bb.0:
75 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
76 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
77 ; CHECK-ZVKB-NEXT: ret
78 %a = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> %va)
79 ret <vscale x 8 x i16> %a
81 declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
83 define <vscale x 16 x i16> @bswap_nxv16i16(<vscale x 16 x i16> %va) {
84 ; CHECK-LABEL: bswap_nxv16i16:
86 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
87 ; CHECK-NEXT: vsrl.vi v12, v8, 8
88 ; CHECK-NEXT: vsll.vi v8, v8, 8
89 ; CHECK-NEXT: vor.vv v8, v8, v12
92 ; CHECK-ZVKB-LABEL: bswap_nxv16i16:
93 ; CHECK-ZVKB: # %bb.0:
94 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
95 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
96 ; CHECK-ZVKB-NEXT: ret
97 %a = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> %va)
98 ret <vscale x 16 x i16> %a
100 declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
102 define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) {
103 ; CHECK-LABEL: bswap_nxv32i16:
105 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
106 ; CHECK-NEXT: vsrl.vi v16, v8, 8
107 ; CHECK-NEXT: vsll.vi v8, v8, 8
108 ; CHECK-NEXT: vor.vv v8, v8, v16
111 ; CHECK-ZVKB-LABEL: bswap_nxv32i16:
112 ; CHECK-ZVKB: # %bb.0:
113 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
114 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
115 ; CHECK-ZVKB-NEXT: ret
116 %a = call <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16> %va)
117 ret <vscale x 32 x i16> %a
119 declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>)
121 define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
122 ; CHECK-LABEL: bswap_nxv1i32:
124 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
125 ; CHECK-NEXT: vsrl.vi v9, v8, 8
126 ; CHECK-NEXT: lui a0, 16
127 ; CHECK-NEXT: vsrl.vi v10, v8, 24
128 ; CHECK-NEXT: addi a0, a0, -256
129 ; CHECK-NEXT: vand.vx v9, v9, a0
130 ; CHECK-NEXT: vor.vv v9, v9, v10
131 ; CHECK-NEXT: vand.vx v10, v8, a0
132 ; CHECK-NEXT: vsll.vi v10, v10, 8
133 ; CHECK-NEXT: vsll.vi v8, v8, 24
134 ; CHECK-NEXT: vor.vv v8, v8, v10
135 ; CHECK-NEXT: vor.vv v8, v8, v9
138 ; CHECK-ZVKB-LABEL: bswap_nxv1i32:
139 ; CHECK-ZVKB: # %bb.0:
140 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
141 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
142 ; CHECK-ZVKB-NEXT: ret
143 %a = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> %va)
144 ret <vscale x 1 x i32> %a
146 declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
148 define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
149 ; CHECK-LABEL: bswap_nxv2i32:
151 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
152 ; CHECK-NEXT: vsrl.vi v9, v8, 8
153 ; CHECK-NEXT: lui a0, 16
154 ; CHECK-NEXT: vsrl.vi v10, v8, 24
155 ; CHECK-NEXT: addi a0, a0, -256
156 ; CHECK-NEXT: vand.vx v9, v9, a0
157 ; CHECK-NEXT: vor.vv v9, v9, v10
158 ; CHECK-NEXT: vand.vx v10, v8, a0
159 ; CHECK-NEXT: vsll.vi v10, v10, 8
160 ; CHECK-NEXT: vsll.vi v8, v8, 24
161 ; CHECK-NEXT: vor.vv v8, v8, v10
162 ; CHECK-NEXT: vor.vv v8, v8, v9
165 ; CHECK-ZVKB-LABEL: bswap_nxv2i32:
166 ; CHECK-ZVKB: # %bb.0:
167 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
168 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
169 ; CHECK-ZVKB-NEXT: ret
170 %a = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> %va)
171 ret <vscale x 2 x i32> %a
173 declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
175 define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
176 ; CHECK-LABEL: bswap_nxv4i32:
178 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
179 ; CHECK-NEXT: vsrl.vi v10, v8, 8
180 ; CHECK-NEXT: lui a0, 16
181 ; CHECK-NEXT: vsrl.vi v12, v8, 24
182 ; CHECK-NEXT: addi a0, a0, -256
183 ; CHECK-NEXT: vand.vx v10, v10, a0
184 ; CHECK-NEXT: vor.vv v10, v10, v12
185 ; CHECK-NEXT: vand.vx v12, v8, a0
186 ; CHECK-NEXT: vsll.vi v12, v12, 8
187 ; CHECK-NEXT: vsll.vi v8, v8, 24
188 ; CHECK-NEXT: vor.vv v8, v8, v12
189 ; CHECK-NEXT: vor.vv v8, v8, v10
192 ; CHECK-ZVKB-LABEL: bswap_nxv4i32:
193 ; CHECK-ZVKB: # %bb.0:
194 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
195 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
196 ; CHECK-ZVKB-NEXT: ret
197 %a = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> %va)
198 ret <vscale x 4 x i32> %a
200 declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
202 define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
203 ; CHECK-LABEL: bswap_nxv8i32:
205 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
206 ; CHECK-NEXT: vsrl.vi v12, v8, 8
207 ; CHECK-NEXT: lui a0, 16
208 ; CHECK-NEXT: vsrl.vi v16, v8, 24
209 ; CHECK-NEXT: addi a0, a0, -256
210 ; CHECK-NEXT: vand.vx v12, v12, a0
211 ; CHECK-NEXT: vor.vv v12, v12, v16
212 ; CHECK-NEXT: vand.vx v16, v8, a0
213 ; CHECK-NEXT: vsll.vi v16, v16, 8
214 ; CHECK-NEXT: vsll.vi v8, v8, 24
215 ; CHECK-NEXT: vor.vv v8, v8, v16
216 ; CHECK-NEXT: vor.vv v8, v8, v12
219 ; CHECK-ZVKB-LABEL: bswap_nxv8i32:
220 ; CHECK-ZVKB: # %bb.0:
221 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m4, ta, ma
222 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
223 ; CHECK-ZVKB-NEXT: ret
224 %a = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> %va)
225 ret <vscale x 8 x i32> %a
227 declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
229 define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) {
230 ; CHECK-LABEL: bswap_nxv16i32:
232 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
233 ; CHECK-NEXT: vsrl.vi v16, v8, 8
234 ; CHECK-NEXT: lui a0, 16
235 ; CHECK-NEXT: vsrl.vi v24, v8, 24
236 ; CHECK-NEXT: addi a0, a0, -256
237 ; CHECK-NEXT: vand.vx v16, v16, a0
238 ; CHECK-NEXT: vor.vv v16, v16, v24
239 ; CHECK-NEXT: vand.vx v24, v8, a0
240 ; CHECK-NEXT: vsll.vi v24, v24, 8
241 ; CHECK-NEXT: vsll.vi v8, v8, 24
242 ; CHECK-NEXT: vor.vv v8, v8, v24
243 ; CHECK-NEXT: vor.vv v8, v8, v16
246 ; CHECK-ZVKB-LABEL: bswap_nxv16i32:
247 ; CHECK-ZVKB: # %bb.0:
248 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
249 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
250 ; CHECK-ZVKB-NEXT: ret
251 %a = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> %va)
252 ret <vscale x 16 x i32> %a
254 declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
256 define <vscale x 1 x i64> @bswap_nxv1i64(<vscale x 1 x i64> %va) {
257 ; RV32-LABEL: bswap_nxv1i64:
259 ; RV32-NEXT: addi sp, sp, -16
260 ; RV32-NEXT: .cfi_def_cfa_offset 16
261 ; RV32-NEXT: lui a0, 1044480
262 ; RV32-NEXT: li a1, 56
263 ; RV32-NEXT: li a2, 40
264 ; RV32-NEXT: lui a3, 16
265 ; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma
266 ; RV32-NEXT: vsrl.vi v9, v8, 24
267 ; RV32-NEXT: lui a4, 4080
268 ; RV32-NEXT: addi a5, sp, 8
269 ; RV32-NEXT: sw a0, 8(sp)
270 ; RV32-NEXT: sw zero, 12(sp)
271 ; RV32-NEXT: vsrl.vx v10, v8, a1
272 ; RV32-NEXT: vsrl.vx v11, v8, a2
273 ; RV32-NEXT: addi a0, a3, -256
274 ; RV32-NEXT: vsll.vx v12, v8, a1
275 ; RV32-NEXT: vand.vx v11, v11, a0
276 ; RV32-NEXT: vlse64.v v13, (a5), zero
277 ; RV32-NEXT: vor.vv v10, v11, v10
278 ; RV32-NEXT: vand.vx v11, v8, a0
279 ; RV32-NEXT: vsll.vx v11, v11, a2
280 ; RV32-NEXT: vor.vv v11, v12, v11
281 ; RV32-NEXT: vsrl.vi v12, v8, 8
282 ; RV32-NEXT: vand.vx v9, v9, a4
283 ; RV32-NEXT: vand.vv v12, v12, v13
284 ; RV32-NEXT: vor.vv v9, v12, v9
285 ; RV32-NEXT: vand.vv v12, v8, v13
286 ; RV32-NEXT: vand.vx v8, v8, a4
287 ; RV32-NEXT: vsll.vi v8, v8, 24
288 ; RV32-NEXT: vsll.vi v12, v12, 8
289 ; RV32-NEXT: vor.vv v9, v9, v10
290 ; RV32-NEXT: vor.vv v8, v8, v12
291 ; RV32-NEXT: vor.vv v8, v11, v8
292 ; RV32-NEXT: vor.vv v8, v8, v9
293 ; RV32-NEXT: addi sp, sp, 16
294 ; RV32-NEXT: .cfi_def_cfa_offset 0
297 ; RV64-LABEL: bswap_nxv1i64:
299 ; RV64-NEXT: li a0, 56
300 ; RV64-NEXT: li a1, 40
301 ; RV64-NEXT: lui a2, 16
302 ; RV64-NEXT: vsetvli a3, zero, e64, m1, ta, ma
303 ; RV64-NEXT: vsrl.vi v9, v8, 24
304 ; RV64-NEXT: lui a3, 4080
305 ; RV64-NEXT: vsrl.vx v10, v8, a0
306 ; RV64-NEXT: vsrl.vx v11, v8, a1
307 ; RV64-NEXT: addiw a2, a2, -256
308 ; RV64-NEXT: vand.vx v11, v11, a2
309 ; RV64-NEXT: vor.vv v10, v11, v10
310 ; RV64-NEXT: vsrl.vi v11, v8, 8
311 ; RV64-NEXT: li a4, 255
312 ; RV64-NEXT: vand.vx v9, v9, a3
313 ; RV64-NEXT: slli a4, a4, 24
314 ; RV64-NEXT: vand.vx v11, v11, a4
315 ; RV64-NEXT: vor.vv v9, v11, v9
316 ; RV64-NEXT: vand.vx v11, v8, a3
317 ; RV64-NEXT: vsll.vi v11, v11, 24
318 ; RV64-NEXT: vor.vv v9, v9, v10
319 ; RV64-NEXT: vand.vx v10, v8, a4
320 ; RV64-NEXT: vsll.vi v10, v10, 8
321 ; RV64-NEXT: vor.vv v10, v11, v10
322 ; RV64-NEXT: vsll.vx v11, v8, a0
323 ; RV64-NEXT: vand.vx v8, v8, a2
324 ; RV64-NEXT: vsll.vx v8, v8, a1
325 ; RV64-NEXT: vor.vv v8, v11, v8
326 ; RV64-NEXT: vor.vv v8, v8, v10
327 ; RV64-NEXT: vor.vv v8, v8, v9
330 ; CHECK-ZVKB-LABEL: bswap_nxv1i64:
331 ; CHECK-ZVKB: # %bb.0:
332 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
333 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
334 ; CHECK-ZVKB-NEXT: ret
335 %a = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> %va)
336 ret <vscale x 1 x i64> %a
338 declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
340 define <vscale x 2 x i64> @bswap_nxv2i64(<vscale x 2 x i64> %va) {
341 ; RV32-LABEL: bswap_nxv2i64:
343 ; RV32-NEXT: addi sp, sp, -16
344 ; RV32-NEXT: .cfi_def_cfa_offset 16
345 ; RV32-NEXT: lui a0, 1044480
346 ; RV32-NEXT: li a1, 56
347 ; RV32-NEXT: li a2, 40
348 ; RV32-NEXT: lui a3, 16
349 ; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma
350 ; RV32-NEXT: vsrl.vi v10, v8, 24
351 ; RV32-NEXT: lui a4, 4080
352 ; RV32-NEXT: addi a5, sp, 8
353 ; RV32-NEXT: sw a0, 8(sp)
354 ; RV32-NEXT: sw zero, 12(sp)
355 ; RV32-NEXT: vsrl.vx v12, v8, a1
356 ; RV32-NEXT: vsrl.vx v14, v8, a2
357 ; RV32-NEXT: addi a0, a3, -256
358 ; RV32-NEXT: vsll.vx v16, v8, a1
359 ; RV32-NEXT: vand.vx v14, v14, a0
360 ; RV32-NEXT: vlse64.v v18, (a5), zero
361 ; RV32-NEXT: vor.vv v12, v14, v12
362 ; RV32-NEXT: vand.vx v14, v8, a0
363 ; RV32-NEXT: vsll.vx v14, v14, a2
364 ; RV32-NEXT: vor.vv v14, v16, v14
365 ; RV32-NEXT: vsrl.vi v16, v8, 8
366 ; RV32-NEXT: vand.vx v10, v10, a4
367 ; RV32-NEXT: vand.vv v16, v16, v18
368 ; RV32-NEXT: vor.vv v10, v16, v10
369 ; RV32-NEXT: vand.vv v16, v8, v18
370 ; RV32-NEXT: vand.vx v8, v8, a4
371 ; RV32-NEXT: vsll.vi v8, v8, 24
372 ; RV32-NEXT: vsll.vi v16, v16, 8
373 ; RV32-NEXT: vor.vv v10, v10, v12
374 ; RV32-NEXT: vor.vv v8, v8, v16
375 ; RV32-NEXT: vor.vv v8, v14, v8
376 ; RV32-NEXT: vor.vv v8, v8, v10
377 ; RV32-NEXT: addi sp, sp, 16
378 ; RV32-NEXT: .cfi_def_cfa_offset 0
381 ; RV64-LABEL: bswap_nxv2i64:
383 ; RV64-NEXT: li a0, 56
384 ; RV64-NEXT: li a1, 40
385 ; RV64-NEXT: lui a2, 16
386 ; RV64-NEXT: vsetvli a3, zero, e64, m2, ta, ma
387 ; RV64-NEXT: vsrl.vi v10, v8, 24
388 ; RV64-NEXT: lui a3, 4080
389 ; RV64-NEXT: vsrl.vx v12, v8, a0
390 ; RV64-NEXT: vsrl.vx v14, v8, a1
391 ; RV64-NEXT: addiw a2, a2, -256
392 ; RV64-NEXT: vand.vx v14, v14, a2
393 ; RV64-NEXT: vor.vv v12, v14, v12
394 ; RV64-NEXT: vsrl.vi v14, v8, 8
395 ; RV64-NEXT: li a4, 255
396 ; RV64-NEXT: vand.vx v10, v10, a3
397 ; RV64-NEXT: slli a4, a4, 24
398 ; RV64-NEXT: vand.vx v14, v14, a4
399 ; RV64-NEXT: vor.vv v10, v14, v10
400 ; RV64-NEXT: vand.vx v14, v8, a3
401 ; RV64-NEXT: vsll.vi v14, v14, 24
402 ; RV64-NEXT: vor.vv v10, v10, v12
403 ; RV64-NEXT: vand.vx v12, v8, a4
404 ; RV64-NEXT: vsll.vi v12, v12, 8
405 ; RV64-NEXT: vor.vv v12, v14, v12
406 ; RV64-NEXT: vsll.vx v14, v8, a0
407 ; RV64-NEXT: vand.vx v8, v8, a2
408 ; RV64-NEXT: vsll.vx v8, v8, a1
409 ; RV64-NEXT: vor.vv v8, v14, v8
410 ; RV64-NEXT: vor.vv v8, v8, v12
411 ; RV64-NEXT: vor.vv v8, v8, v10
414 ; CHECK-ZVKB-LABEL: bswap_nxv2i64:
415 ; CHECK-ZVKB: # %bb.0:
416 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
417 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
418 ; CHECK-ZVKB-NEXT: ret
419 %a = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> %va)
420 ret <vscale x 2 x i64> %a
422 declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
424 define <vscale x 4 x i64> @bswap_nxv4i64(<vscale x 4 x i64> %va) {
425 ; RV32-LABEL: bswap_nxv4i64:
427 ; RV32-NEXT: addi sp, sp, -16
428 ; RV32-NEXT: .cfi_def_cfa_offset 16
429 ; RV32-NEXT: lui a0, 1044480
430 ; RV32-NEXT: li a1, 56
431 ; RV32-NEXT: li a2, 40
432 ; RV32-NEXT: lui a3, 16
433 ; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma
434 ; RV32-NEXT: vsrl.vi v12, v8, 24
435 ; RV32-NEXT: lui a4, 4080
436 ; RV32-NEXT: addi a5, sp, 8
437 ; RV32-NEXT: sw a0, 8(sp)
438 ; RV32-NEXT: sw zero, 12(sp)
439 ; RV32-NEXT: vsrl.vx v16, v8, a1
440 ; RV32-NEXT: vsrl.vx v20, v8, a2
441 ; RV32-NEXT: addi a0, a3, -256
442 ; RV32-NEXT: vsll.vx v24, v8, a1
443 ; RV32-NEXT: vand.vx v20, v20, a0
444 ; RV32-NEXT: vlse64.v v28, (a5), zero
445 ; RV32-NEXT: vor.vv v16, v20, v16
446 ; RV32-NEXT: vand.vx v20, v8, a0
447 ; RV32-NEXT: vsll.vx v20, v20, a2
448 ; RV32-NEXT: vor.vv v20, v24, v20
449 ; RV32-NEXT: vsrl.vi v24, v8, 8
450 ; RV32-NEXT: vand.vx v12, v12, a4
451 ; RV32-NEXT: vand.vv v24, v24, v28
452 ; RV32-NEXT: vor.vv v12, v24, v12
453 ; RV32-NEXT: vand.vv v24, v8, v28
454 ; RV32-NEXT: vand.vx v8, v8, a4
455 ; RV32-NEXT: vsll.vi v8, v8, 24
456 ; RV32-NEXT: vsll.vi v24, v24, 8
457 ; RV32-NEXT: vor.vv v12, v12, v16
458 ; RV32-NEXT: vor.vv v8, v8, v24
459 ; RV32-NEXT: vor.vv v8, v20, v8
460 ; RV32-NEXT: vor.vv v8, v8, v12
461 ; RV32-NEXT: addi sp, sp, 16
462 ; RV32-NEXT: .cfi_def_cfa_offset 0
465 ; RV64-LABEL: bswap_nxv4i64:
467 ; RV64-NEXT: li a0, 56
468 ; RV64-NEXT: li a1, 40
469 ; RV64-NEXT: lui a2, 16
470 ; RV64-NEXT: vsetvli a3, zero, e64, m4, ta, ma
471 ; RV64-NEXT: vsrl.vi v12, v8, 24
472 ; RV64-NEXT: lui a3, 4080
473 ; RV64-NEXT: vsrl.vx v16, v8, a0
474 ; RV64-NEXT: vsrl.vx v20, v8, a1
475 ; RV64-NEXT: addiw a2, a2, -256
476 ; RV64-NEXT: vand.vx v20, v20, a2
477 ; RV64-NEXT: vor.vv v16, v20, v16
478 ; RV64-NEXT: vsrl.vi v20, v8, 8
479 ; RV64-NEXT: li a4, 255
480 ; RV64-NEXT: vand.vx v12, v12, a3
481 ; RV64-NEXT: slli a4, a4, 24
482 ; RV64-NEXT: vand.vx v20, v20, a4
483 ; RV64-NEXT: vor.vv v12, v20, v12
484 ; RV64-NEXT: vand.vx v20, v8, a3
485 ; RV64-NEXT: vsll.vi v20, v20, 24
486 ; RV64-NEXT: vor.vv v12, v12, v16
487 ; RV64-NEXT: vand.vx v16, v8, a4
488 ; RV64-NEXT: vsll.vi v16, v16, 8
489 ; RV64-NEXT: vor.vv v16, v20, v16
490 ; RV64-NEXT: vsll.vx v20, v8, a0
491 ; RV64-NEXT: vand.vx v8, v8, a2
492 ; RV64-NEXT: vsll.vx v8, v8, a1
493 ; RV64-NEXT: vor.vv v8, v20, v8
494 ; RV64-NEXT: vor.vv v8, v8, v16
495 ; RV64-NEXT: vor.vv v8, v8, v12
498 ; CHECK-ZVKB-LABEL: bswap_nxv4i64:
499 ; CHECK-ZVKB: # %bb.0:
500 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
501 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
502 ; CHECK-ZVKB-NEXT: ret
503 %a = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> %va)
504 ret <vscale x 4 x i64> %a
506 declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
508 define <vscale x 8 x i64> @bswap_nxv8i64(<vscale x 8 x i64> %va) {
509 ; RV32-LABEL: bswap_nxv8i64:
511 ; RV32-NEXT: addi sp, sp, -16
512 ; RV32-NEXT: .cfi_def_cfa_offset 16
513 ; RV32-NEXT: csrr a0, vlenb
514 ; RV32-NEXT: slli a0, a0, 4
515 ; RV32-NEXT: sub sp, sp, a0
516 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
517 ; RV32-NEXT: lui a0, 1044480
518 ; RV32-NEXT: li a1, 56
519 ; RV32-NEXT: li a2, 40
520 ; RV32-NEXT: lui a3, 16
521 ; RV32-NEXT: lui a4, 4080
522 ; RV32-NEXT: addi a5, sp, 8
523 ; RV32-NEXT: sw a0, 8(sp)
524 ; RV32-NEXT: sw zero, 12(sp)
525 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
526 ; RV32-NEXT: vsrl.vx v16, v8, a1
527 ; RV32-NEXT: vsrl.vx v24, v8, a2
528 ; RV32-NEXT: addi a0, a3, -256
529 ; RV32-NEXT: vsll.vx v0, v8, a1
530 ; RV32-NEXT: vand.vx v24, v24, a0
531 ; RV32-NEXT: vor.vv v16, v24, v16
532 ; RV32-NEXT: addi a1, sp, 16
533 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
534 ; RV32-NEXT: vand.vx v16, v8, a0
535 ; RV32-NEXT: vsll.vx v16, v16, a2
536 ; RV32-NEXT: vor.vv v16, v0, v16
537 ; RV32-NEXT: csrr a0, vlenb
538 ; RV32-NEXT: slli a0, a0, 3
539 ; RV32-NEXT: add a0, sp, a0
540 ; RV32-NEXT: addi a0, a0, 16
541 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
542 ; RV32-NEXT: vlse64.v v0, (a5), zero
543 ; RV32-NEXT: vsrl.vi v16, v8, 24
544 ; RV32-NEXT: vand.vx v16, v16, a4
545 ; RV32-NEXT: vsrl.vi v24, v8, 8
546 ; RV32-NEXT: vand.vv v24, v24, v0
547 ; RV32-NEXT: vor.vv v16, v24, v16
548 ; RV32-NEXT: addi a0, sp, 16
549 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
550 ; RV32-NEXT: vor.vv v24, v16, v24
551 ; RV32-NEXT: vand.vv v16, v8, v0
552 ; RV32-NEXT: vand.vx v8, v8, a4
553 ; RV32-NEXT: vsll.vi v8, v8, 24
554 ; RV32-NEXT: vsll.vi v16, v16, 8
555 ; RV32-NEXT: vor.vv v8, v8, v16
556 ; RV32-NEXT: csrr a0, vlenb
557 ; RV32-NEXT: slli a0, a0, 3
558 ; RV32-NEXT: add a0, sp, a0
559 ; RV32-NEXT: addi a0, a0, 16
560 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
561 ; RV32-NEXT: vor.vv v8, v16, v8
562 ; RV32-NEXT: vor.vv v8, v8, v24
563 ; RV32-NEXT: csrr a0, vlenb
564 ; RV32-NEXT: slli a0, a0, 4
565 ; RV32-NEXT: add sp, sp, a0
566 ; RV32-NEXT: .cfi_def_cfa sp, 16
567 ; RV32-NEXT: addi sp, sp, 16
568 ; RV32-NEXT: .cfi_def_cfa_offset 0
571 ; RV64-LABEL: bswap_nxv8i64:
573 ; RV64-NEXT: li a0, 56
574 ; RV64-NEXT: li a1, 40
575 ; RV64-NEXT: lui a2, 16
576 ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
577 ; RV64-NEXT: vsrl.vi v24, v8, 24
578 ; RV64-NEXT: lui a3, 4080
579 ; RV64-NEXT: vsrl.vx v16, v8, a0
580 ; RV64-NEXT: vsrl.vx v0, v8, a1
581 ; RV64-NEXT: addiw a2, a2, -256
582 ; RV64-NEXT: vand.vx v0, v0, a2
583 ; RV64-NEXT: vor.vv v16, v0, v16
584 ; RV64-NEXT: vsrl.vi v0, v8, 8
585 ; RV64-NEXT: li a4, 255
586 ; RV64-NEXT: vand.vx v24, v24, a3
587 ; RV64-NEXT: slli a4, a4, 24
588 ; RV64-NEXT: vand.vx v0, v0, a4
589 ; RV64-NEXT: vor.vv v24, v0, v24
590 ; RV64-NEXT: vand.vx v0, v8, a3
591 ; RV64-NEXT: vsll.vi v0, v0, 24
592 ; RV64-NEXT: vor.vv v16, v24, v16
593 ; RV64-NEXT: vand.vx v24, v8, a4
594 ; RV64-NEXT: vsll.vi v24, v24, 8
595 ; RV64-NEXT: vor.vv v24, v0, v24
596 ; RV64-NEXT: vsll.vx v0, v8, a0
597 ; RV64-NEXT: vand.vx v8, v8, a2
598 ; RV64-NEXT: vsll.vx v8, v8, a1
599 ; RV64-NEXT: vor.vv v8, v0, v8
600 ; RV64-NEXT: vor.vv v8, v8, v24
601 ; RV64-NEXT: vor.vv v8, v8, v16
604 ; CHECK-ZVKB-LABEL: bswap_nxv8i64:
605 ; CHECK-ZVKB: # %bb.0:
606 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
607 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
608 ; CHECK-ZVKB-NEXT: ret
609 %a = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> %va)
610 ret <vscale x 8 x i64> %a
612 declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)