1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
7 define <vscale x 1 x i16> @bswap_nxv1i16(<vscale x 1 x i16> %va) {
8 ; CHECK-LABEL: bswap_nxv1i16:
10 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
11 ; CHECK-NEXT: vsrl.vi v9, v8, 8
12 ; CHECK-NEXT: vsll.vi v8, v8, 8
13 ; CHECK-NEXT: vor.vv v8, v8, v9
16 ; CHECK-ZVKB-LABEL: bswap_nxv1i16:
17 ; CHECK-ZVKB: # %bb.0:
18 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
19 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
20 ; CHECK-ZVKB-NEXT: ret
21 %a = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> %va)
22 ret <vscale x 1 x i16> %a
24 declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
26 define <vscale x 2 x i16> @bswap_nxv2i16(<vscale x 2 x i16> %va) {
27 ; CHECK-LABEL: bswap_nxv2i16:
29 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
30 ; CHECK-NEXT: vsrl.vi v9, v8, 8
31 ; CHECK-NEXT: vsll.vi v8, v8, 8
32 ; CHECK-NEXT: vor.vv v8, v8, v9
35 ; CHECK-ZVKB-LABEL: bswap_nxv2i16:
36 ; CHECK-ZVKB: # %bb.0:
37 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
38 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
39 ; CHECK-ZVKB-NEXT: ret
40 %a = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> %va)
41 ret <vscale x 2 x i16> %a
43 declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
45 define <vscale x 4 x i16> @bswap_nxv4i16(<vscale x 4 x i16> %va) {
46 ; CHECK-LABEL: bswap_nxv4i16:
48 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
49 ; CHECK-NEXT: vsrl.vi v9, v8, 8
50 ; CHECK-NEXT: vsll.vi v8, v8, 8
51 ; CHECK-NEXT: vor.vv v8, v8, v9
54 ; CHECK-ZVKB-LABEL: bswap_nxv4i16:
55 ; CHECK-ZVKB: # %bb.0:
56 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
57 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
58 ; CHECK-ZVKB-NEXT: ret
59 %a = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> %va)
60 ret <vscale x 4 x i16> %a
62 declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
64 define <vscale x 8 x i16> @bswap_nxv8i16(<vscale x 8 x i16> %va) {
65 ; CHECK-LABEL: bswap_nxv8i16:
67 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
68 ; CHECK-NEXT: vsrl.vi v10, v8, 8
69 ; CHECK-NEXT: vsll.vi v8, v8, 8
70 ; CHECK-NEXT: vor.vv v8, v8, v10
73 ; CHECK-ZVKB-LABEL: bswap_nxv8i16:
74 ; CHECK-ZVKB: # %bb.0:
75 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
76 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
77 ; CHECK-ZVKB-NEXT: ret
78 %a = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> %va)
79 ret <vscale x 8 x i16> %a
81 declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
83 define <vscale x 16 x i16> @bswap_nxv16i16(<vscale x 16 x i16> %va) {
84 ; CHECK-LABEL: bswap_nxv16i16:
86 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
87 ; CHECK-NEXT: vsrl.vi v12, v8, 8
88 ; CHECK-NEXT: vsll.vi v8, v8, 8
89 ; CHECK-NEXT: vor.vv v8, v8, v12
92 ; CHECK-ZVKB-LABEL: bswap_nxv16i16:
93 ; CHECK-ZVKB: # %bb.0:
94 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
95 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
96 ; CHECK-ZVKB-NEXT: ret
97 %a = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> %va)
98 ret <vscale x 16 x i16> %a
100 declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
102 define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) {
103 ; CHECK-LABEL: bswap_nxv32i16:
105 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
106 ; CHECK-NEXT: vsrl.vi v16, v8, 8
107 ; CHECK-NEXT: vsll.vi v8, v8, 8
108 ; CHECK-NEXT: vor.vv v8, v8, v16
111 ; CHECK-ZVKB-LABEL: bswap_nxv32i16:
112 ; CHECK-ZVKB: # %bb.0:
113 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
114 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
115 ; CHECK-ZVKB-NEXT: ret
116 %a = call <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16> %va)
117 ret <vscale x 32 x i16> %a
119 declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>)
121 define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
122 ; CHECK-LABEL: bswap_nxv1i32:
124 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
125 ; CHECK-NEXT: vsrl.vi v9, v8, 8
126 ; CHECK-NEXT: lui a0, 16
127 ; CHECK-NEXT: addi a0, a0, -256
128 ; CHECK-NEXT: vand.vx v9, v9, a0
129 ; CHECK-NEXT: vsrl.vi v10, v8, 24
130 ; CHECK-NEXT: vor.vv v9, v9, v10
131 ; CHECK-NEXT: vand.vx v10, v8, a0
132 ; CHECK-NEXT: vsll.vi v10, v10, 8
133 ; CHECK-NEXT: vsll.vi v8, v8, 24
134 ; CHECK-NEXT: vor.vv v8, v8, v10
135 ; CHECK-NEXT: vor.vv v8, v8, v9
138 ; CHECK-ZVKB-LABEL: bswap_nxv1i32:
139 ; CHECK-ZVKB: # %bb.0:
140 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
141 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
142 ; CHECK-ZVKB-NEXT: ret
143 %a = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> %va)
144 ret <vscale x 1 x i32> %a
146 declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
148 define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
149 ; CHECK-LABEL: bswap_nxv2i32:
151 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
152 ; CHECK-NEXT: vsrl.vi v9, v8, 8
153 ; CHECK-NEXT: lui a0, 16
154 ; CHECK-NEXT: addi a0, a0, -256
155 ; CHECK-NEXT: vand.vx v9, v9, a0
156 ; CHECK-NEXT: vsrl.vi v10, v8, 24
157 ; CHECK-NEXT: vor.vv v9, v9, v10
158 ; CHECK-NEXT: vand.vx v10, v8, a0
159 ; CHECK-NEXT: vsll.vi v10, v10, 8
160 ; CHECK-NEXT: vsll.vi v8, v8, 24
161 ; CHECK-NEXT: vor.vv v8, v8, v10
162 ; CHECK-NEXT: vor.vv v8, v8, v9
165 ; CHECK-ZVKB-LABEL: bswap_nxv2i32:
166 ; CHECK-ZVKB: # %bb.0:
167 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
168 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
169 ; CHECK-ZVKB-NEXT: ret
170 %a = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> %va)
171 ret <vscale x 2 x i32> %a
173 declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
175 define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
176 ; CHECK-LABEL: bswap_nxv4i32:
178 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
179 ; CHECK-NEXT: vsrl.vi v10, v8, 8
180 ; CHECK-NEXT: lui a0, 16
181 ; CHECK-NEXT: addi a0, a0, -256
182 ; CHECK-NEXT: vand.vx v10, v10, a0
183 ; CHECK-NEXT: vsrl.vi v12, v8, 24
184 ; CHECK-NEXT: vor.vv v10, v10, v12
185 ; CHECK-NEXT: vand.vx v12, v8, a0
186 ; CHECK-NEXT: vsll.vi v12, v12, 8
187 ; CHECK-NEXT: vsll.vi v8, v8, 24
188 ; CHECK-NEXT: vor.vv v8, v8, v12
189 ; CHECK-NEXT: vor.vv v8, v8, v10
192 ; CHECK-ZVKB-LABEL: bswap_nxv4i32:
193 ; CHECK-ZVKB: # %bb.0:
194 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
195 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
196 ; CHECK-ZVKB-NEXT: ret
197 %a = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> %va)
198 ret <vscale x 4 x i32> %a
200 declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
202 define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
203 ; CHECK-LABEL: bswap_nxv8i32:
205 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
206 ; CHECK-NEXT: vsrl.vi v12, v8, 8
207 ; CHECK-NEXT: lui a0, 16
208 ; CHECK-NEXT: addi a0, a0, -256
209 ; CHECK-NEXT: vand.vx v12, v12, a0
210 ; CHECK-NEXT: vsrl.vi v16, v8, 24
211 ; CHECK-NEXT: vor.vv v12, v12, v16
212 ; CHECK-NEXT: vand.vx v16, v8, a0
213 ; CHECK-NEXT: vsll.vi v16, v16, 8
214 ; CHECK-NEXT: vsll.vi v8, v8, 24
215 ; CHECK-NEXT: vor.vv v8, v8, v16
216 ; CHECK-NEXT: vor.vv v8, v8, v12
219 ; CHECK-ZVKB-LABEL: bswap_nxv8i32:
220 ; CHECK-ZVKB: # %bb.0:
221 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m4, ta, ma
222 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
223 ; CHECK-ZVKB-NEXT: ret
224 %a = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> %va)
225 ret <vscale x 8 x i32> %a
227 declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
229 define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) {
230 ; CHECK-LABEL: bswap_nxv16i32:
232 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
233 ; CHECK-NEXT: vsrl.vi v16, v8, 8
234 ; CHECK-NEXT: lui a0, 16
235 ; CHECK-NEXT: addi a0, a0, -256
236 ; CHECK-NEXT: vand.vx v16, v16, a0
237 ; CHECK-NEXT: vsrl.vi v24, v8, 24
238 ; CHECK-NEXT: vor.vv v16, v16, v24
239 ; CHECK-NEXT: vand.vx v24, v8, a0
240 ; CHECK-NEXT: vsll.vi v24, v24, 8
241 ; CHECK-NEXT: vsll.vi v8, v8, 24
242 ; CHECK-NEXT: vor.vv v8, v8, v24
243 ; CHECK-NEXT: vor.vv v8, v8, v16
246 ; CHECK-ZVKB-LABEL: bswap_nxv16i32:
247 ; CHECK-ZVKB: # %bb.0:
248 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m8, ta, ma
249 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
250 ; CHECK-ZVKB-NEXT: ret
251 %a = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> %va)
252 ret <vscale x 16 x i32> %a
254 declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
256 define <vscale x 1 x i64> @bswap_nxv1i64(<vscale x 1 x i64> %va) {
257 ; RV32-LABEL: bswap_nxv1i64:
259 ; RV32-NEXT: addi sp, sp, -16
260 ; RV32-NEXT: .cfi_def_cfa_offset 16
261 ; RV32-NEXT: sw zero, 12(sp)
262 ; RV32-NEXT: lui a0, 1044480
263 ; RV32-NEXT: sw a0, 8(sp)
264 ; RV32-NEXT: li a0, 56
265 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
266 ; RV32-NEXT: vsrl.vx v9, v8, a0
267 ; RV32-NEXT: li a1, 40
268 ; RV32-NEXT: vsrl.vx v10, v8, a1
269 ; RV32-NEXT: lui a2, 16
270 ; RV32-NEXT: addi a2, a2, -256
271 ; RV32-NEXT: vand.vx v10, v10, a2
272 ; RV32-NEXT: vor.vv v9, v10, v9
273 ; RV32-NEXT: vsrl.vi v10, v8, 24
274 ; RV32-NEXT: addi a3, sp, 8
275 ; RV32-NEXT: vlse64.v v11, (a3), zero
276 ; RV32-NEXT: lui a3, 4080
277 ; RV32-NEXT: vand.vx v10, v10, a3
278 ; RV32-NEXT: vsrl.vi v12, v8, 8
279 ; RV32-NEXT: vand.vv v12, v12, v11
280 ; RV32-NEXT: vor.vv v10, v12, v10
281 ; RV32-NEXT: vor.vv v9, v10, v9
282 ; RV32-NEXT: vsll.vx v10, v8, a0
283 ; RV32-NEXT: vand.vx v12, v8, a2
284 ; RV32-NEXT: vsll.vx v12, v12, a1
285 ; RV32-NEXT: vor.vv v10, v10, v12
286 ; RV32-NEXT: vand.vx v12, v8, a3
287 ; RV32-NEXT: vsll.vi v12, v12, 24
288 ; RV32-NEXT: vand.vv v8, v8, v11
289 ; RV32-NEXT: vsll.vi v8, v8, 8
290 ; RV32-NEXT: vor.vv v8, v12, v8
291 ; RV32-NEXT: vor.vv v8, v10, v8
292 ; RV32-NEXT: vor.vv v8, v8, v9
293 ; RV32-NEXT: addi sp, sp, 16
296 ; RV64-LABEL: bswap_nxv1i64:
298 ; RV64-NEXT: li a0, 56
299 ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
300 ; RV64-NEXT: vsrl.vx v9, v8, a0
301 ; RV64-NEXT: li a1, 40
302 ; RV64-NEXT: vsrl.vx v10, v8, a1
303 ; RV64-NEXT: lui a2, 16
304 ; RV64-NEXT: addiw a2, a2, -256
305 ; RV64-NEXT: vand.vx v10, v10, a2
306 ; RV64-NEXT: vor.vv v9, v10, v9
307 ; RV64-NEXT: vsrl.vi v10, v8, 24
308 ; RV64-NEXT: lui a3, 4080
309 ; RV64-NEXT: vand.vx v10, v10, a3
310 ; RV64-NEXT: vsrl.vi v11, v8, 8
311 ; RV64-NEXT: li a4, 255
312 ; RV64-NEXT: slli a4, a4, 24
313 ; RV64-NEXT: vand.vx v11, v11, a4
314 ; RV64-NEXT: vor.vv v10, v11, v10
315 ; RV64-NEXT: vor.vv v9, v10, v9
316 ; RV64-NEXT: vand.vx v10, v8, a3
317 ; RV64-NEXT: vsll.vi v10, v10, 24
318 ; RV64-NEXT: vand.vx v11, v8, a4
319 ; RV64-NEXT: vsll.vi v11, v11, 8
320 ; RV64-NEXT: vor.vv v10, v10, v11
321 ; RV64-NEXT: vsll.vx v11, v8, a0
322 ; RV64-NEXT: vand.vx v8, v8, a2
323 ; RV64-NEXT: vsll.vx v8, v8, a1
324 ; RV64-NEXT: vor.vv v8, v11, v8
325 ; RV64-NEXT: vor.vv v8, v8, v10
326 ; RV64-NEXT: vor.vv v8, v8, v9
329 ; CHECK-ZVKB-LABEL: bswap_nxv1i64:
330 ; CHECK-ZVKB: # %bb.0:
331 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
332 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
333 ; CHECK-ZVKB-NEXT: ret
334 %a = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> %va)
335 ret <vscale x 1 x i64> %a
337 declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
339 define <vscale x 2 x i64> @bswap_nxv2i64(<vscale x 2 x i64> %va) {
340 ; RV32-LABEL: bswap_nxv2i64:
342 ; RV32-NEXT: addi sp, sp, -16
343 ; RV32-NEXT: .cfi_def_cfa_offset 16
344 ; RV32-NEXT: sw zero, 12(sp)
345 ; RV32-NEXT: lui a0, 1044480
346 ; RV32-NEXT: sw a0, 8(sp)
347 ; RV32-NEXT: li a0, 56
348 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
349 ; RV32-NEXT: vsrl.vx v10, v8, a0
350 ; RV32-NEXT: li a1, 40
351 ; RV32-NEXT: vsrl.vx v12, v8, a1
352 ; RV32-NEXT: lui a2, 16
353 ; RV32-NEXT: addi a2, a2, -256
354 ; RV32-NEXT: vand.vx v12, v12, a2
355 ; RV32-NEXT: vor.vv v10, v12, v10
356 ; RV32-NEXT: vsrl.vi v12, v8, 24
357 ; RV32-NEXT: addi a3, sp, 8
358 ; RV32-NEXT: vlse64.v v14, (a3), zero
359 ; RV32-NEXT: lui a3, 4080
360 ; RV32-NEXT: vand.vx v12, v12, a3
361 ; RV32-NEXT: vsrl.vi v16, v8, 8
362 ; RV32-NEXT: vand.vv v16, v16, v14
363 ; RV32-NEXT: vor.vv v12, v16, v12
364 ; RV32-NEXT: vor.vv v10, v12, v10
365 ; RV32-NEXT: vsll.vx v12, v8, a0
366 ; RV32-NEXT: vand.vx v16, v8, a2
367 ; RV32-NEXT: vsll.vx v16, v16, a1
368 ; RV32-NEXT: vor.vv v12, v12, v16
369 ; RV32-NEXT: vand.vx v16, v8, a3
370 ; RV32-NEXT: vsll.vi v16, v16, 24
371 ; RV32-NEXT: vand.vv v8, v8, v14
372 ; RV32-NEXT: vsll.vi v8, v8, 8
373 ; RV32-NEXT: vor.vv v8, v16, v8
374 ; RV32-NEXT: vor.vv v8, v12, v8
375 ; RV32-NEXT: vor.vv v8, v8, v10
376 ; RV32-NEXT: addi sp, sp, 16
379 ; RV64-LABEL: bswap_nxv2i64:
381 ; RV64-NEXT: li a0, 56
382 ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma
383 ; RV64-NEXT: vsrl.vx v10, v8, a0
384 ; RV64-NEXT: li a1, 40
385 ; RV64-NEXT: vsrl.vx v12, v8, a1
386 ; RV64-NEXT: lui a2, 16
387 ; RV64-NEXT: addiw a2, a2, -256
388 ; RV64-NEXT: vand.vx v12, v12, a2
389 ; RV64-NEXT: vor.vv v10, v12, v10
390 ; RV64-NEXT: vsrl.vi v12, v8, 24
391 ; RV64-NEXT: lui a3, 4080
392 ; RV64-NEXT: vand.vx v12, v12, a3
393 ; RV64-NEXT: vsrl.vi v14, v8, 8
394 ; RV64-NEXT: li a4, 255
395 ; RV64-NEXT: slli a4, a4, 24
396 ; RV64-NEXT: vand.vx v14, v14, a4
397 ; RV64-NEXT: vor.vv v12, v14, v12
398 ; RV64-NEXT: vor.vv v10, v12, v10
399 ; RV64-NEXT: vand.vx v12, v8, a3
400 ; RV64-NEXT: vsll.vi v12, v12, 24
401 ; RV64-NEXT: vand.vx v14, v8, a4
402 ; RV64-NEXT: vsll.vi v14, v14, 8
403 ; RV64-NEXT: vor.vv v12, v12, v14
404 ; RV64-NEXT: vsll.vx v14, v8, a0
405 ; RV64-NEXT: vand.vx v8, v8, a2
406 ; RV64-NEXT: vsll.vx v8, v8, a1
407 ; RV64-NEXT: vor.vv v8, v14, v8
408 ; RV64-NEXT: vor.vv v8, v8, v12
409 ; RV64-NEXT: vor.vv v8, v8, v10
412 ; CHECK-ZVKB-LABEL: bswap_nxv2i64:
413 ; CHECK-ZVKB: # %bb.0:
414 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
415 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
416 ; CHECK-ZVKB-NEXT: ret
417 %a = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> %va)
418 ret <vscale x 2 x i64> %a
420 declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
422 define <vscale x 4 x i64> @bswap_nxv4i64(<vscale x 4 x i64> %va) {
423 ; RV32-LABEL: bswap_nxv4i64:
425 ; RV32-NEXT: addi sp, sp, -16
426 ; RV32-NEXT: .cfi_def_cfa_offset 16
427 ; RV32-NEXT: sw zero, 12(sp)
428 ; RV32-NEXT: lui a0, 1044480
429 ; RV32-NEXT: sw a0, 8(sp)
430 ; RV32-NEXT: li a0, 56
431 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
432 ; RV32-NEXT: vsrl.vx v12, v8, a0
433 ; RV32-NEXT: li a1, 40
434 ; RV32-NEXT: vsrl.vx v16, v8, a1
435 ; RV32-NEXT: lui a2, 16
436 ; RV32-NEXT: addi a2, a2, -256
437 ; RV32-NEXT: vand.vx v16, v16, a2
438 ; RV32-NEXT: vor.vv v12, v16, v12
439 ; RV32-NEXT: vsrl.vi v16, v8, 24
440 ; RV32-NEXT: addi a3, sp, 8
441 ; RV32-NEXT: vlse64.v v20, (a3), zero
442 ; RV32-NEXT: lui a3, 4080
443 ; RV32-NEXT: vand.vx v16, v16, a3
444 ; RV32-NEXT: vsrl.vi v24, v8, 8
445 ; RV32-NEXT: vand.vv v24, v24, v20
446 ; RV32-NEXT: vor.vv v16, v24, v16
447 ; RV32-NEXT: vor.vv v12, v16, v12
448 ; RV32-NEXT: vsll.vx v16, v8, a0
449 ; RV32-NEXT: vand.vx v24, v8, a2
450 ; RV32-NEXT: vsll.vx v24, v24, a1
451 ; RV32-NEXT: vor.vv v16, v16, v24
452 ; RV32-NEXT: vand.vx v24, v8, a3
453 ; RV32-NEXT: vsll.vi v24, v24, 24
454 ; RV32-NEXT: vand.vv v8, v8, v20
455 ; RV32-NEXT: vsll.vi v8, v8, 8
456 ; RV32-NEXT: vor.vv v8, v24, v8
457 ; RV32-NEXT: vor.vv v8, v16, v8
458 ; RV32-NEXT: vor.vv v8, v8, v12
459 ; RV32-NEXT: addi sp, sp, 16
462 ; RV64-LABEL: bswap_nxv4i64:
464 ; RV64-NEXT: li a0, 56
465 ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma
466 ; RV64-NEXT: vsrl.vx v12, v8, a0
467 ; RV64-NEXT: li a1, 40
468 ; RV64-NEXT: vsrl.vx v16, v8, a1
469 ; RV64-NEXT: lui a2, 16
470 ; RV64-NEXT: addiw a2, a2, -256
471 ; RV64-NEXT: vand.vx v16, v16, a2
472 ; RV64-NEXT: vor.vv v12, v16, v12
473 ; RV64-NEXT: vsrl.vi v16, v8, 24
474 ; RV64-NEXT: lui a3, 4080
475 ; RV64-NEXT: vand.vx v16, v16, a3
476 ; RV64-NEXT: vsrl.vi v20, v8, 8
477 ; RV64-NEXT: li a4, 255
478 ; RV64-NEXT: slli a4, a4, 24
479 ; RV64-NEXT: vand.vx v20, v20, a4
480 ; RV64-NEXT: vor.vv v16, v20, v16
481 ; RV64-NEXT: vor.vv v12, v16, v12
482 ; RV64-NEXT: vand.vx v16, v8, a3
483 ; RV64-NEXT: vsll.vi v16, v16, 24
484 ; RV64-NEXT: vand.vx v20, v8, a4
485 ; RV64-NEXT: vsll.vi v20, v20, 8
486 ; RV64-NEXT: vor.vv v16, v16, v20
487 ; RV64-NEXT: vsll.vx v20, v8, a0
488 ; RV64-NEXT: vand.vx v8, v8, a2
489 ; RV64-NEXT: vsll.vx v8, v8, a1
490 ; RV64-NEXT: vor.vv v8, v20, v8
491 ; RV64-NEXT: vor.vv v8, v8, v16
492 ; RV64-NEXT: vor.vv v8, v8, v12
495 ; CHECK-ZVKB-LABEL: bswap_nxv4i64:
496 ; CHECK-ZVKB: # %bb.0:
497 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
498 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
499 ; CHECK-ZVKB-NEXT: ret
500 %a = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> %va)
501 ret <vscale x 4 x i64> %a
503 declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
505 define <vscale x 8 x i64> @bswap_nxv8i64(<vscale x 8 x i64> %va) {
506 ; RV32-LABEL: bswap_nxv8i64:
508 ; RV32-NEXT: addi sp, sp, -16
509 ; RV32-NEXT: .cfi_def_cfa_offset 16
510 ; RV32-NEXT: csrr a0, vlenb
511 ; RV32-NEXT: slli a0, a0, 3
512 ; RV32-NEXT: sub sp, sp, a0
513 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
514 ; RV32-NEXT: sw zero, 12(sp)
515 ; RV32-NEXT: lui a0, 1044480
516 ; RV32-NEXT: sw a0, 8(sp)
517 ; RV32-NEXT: li a0, 56
518 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
519 ; RV32-NEXT: vsrl.vx v16, v8, a0
520 ; RV32-NEXT: li a1, 40
521 ; RV32-NEXT: vsrl.vx v24, v8, a1
522 ; RV32-NEXT: lui a2, 16
523 ; RV32-NEXT: addi a2, a2, -256
524 ; RV32-NEXT: vand.vx v24, v24, a2
525 ; RV32-NEXT: vor.vv v16, v24, v16
526 ; RV32-NEXT: addi a3, sp, 16
527 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
528 ; RV32-NEXT: vsrl.vi v0, v8, 24
529 ; RV32-NEXT: addi a3, sp, 8
530 ; RV32-NEXT: vlse64.v v16, (a3), zero
531 ; RV32-NEXT: lui a3, 4080
532 ; RV32-NEXT: vand.vx v0, v0, a3
533 ; RV32-NEXT: vsrl.vi v24, v8, 8
534 ; RV32-NEXT: vand.vv v24, v24, v16
535 ; RV32-NEXT: vor.vv v24, v24, v0
536 ; RV32-NEXT: addi a4, sp, 16
537 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload
538 ; RV32-NEXT: vor.vv v24, v24, v0
539 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
540 ; RV32-NEXT: vand.vx v0, v8, a2
541 ; RV32-NEXT: vsll.vx v0, v0, a1
542 ; RV32-NEXT: vsll.vx v24, v8, a0
543 ; RV32-NEXT: vor.vv v24, v24, v0
544 ; RV32-NEXT: vand.vv v16, v8, v16
545 ; RV32-NEXT: vand.vx v8, v8, a3
546 ; RV32-NEXT: vsll.vi v8, v8, 24
547 ; RV32-NEXT: vsll.vi v16, v16, 8
548 ; RV32-NEXT: vor.vv v8, v8, v16
549 ; RV32-NEXT: vor.vv v8, v24, v8
550 ; RV32-NEXT: addi a0, sp, 16
551 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
552 ; RV32-NEXT: vor.vv v8, v8, v16
553 ; RV32-NEXT: csrr a0, vlenb
554 ; RV32-NEXT: slli a0, a0, 3
555 ; RV32-NEXT: add sp, sp, a0
556 ; RV32-NEXT: addi sp, sp, 16
559 ; RV64-LABEL: bswap_nxv8i64:
561 ; RV64-NEXT: li a0, 56
562 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
563 ; RV64-NEXT: vsrl.vx v16, v8, a0
564 ; RV64-NEXT: li a1, 40
565 ; RV64-NEXT: vsrl.vx v24, v8, a1
566 ; RV64-NEXT: lui a2, 16
567 ; RV64-NEXT: addiw a2, a2, -256
568 ; RV64-NEXT: vand.vx v24, v24, a2
569 ; RV64-NEXT: vor.vv v16, v24, v16
570 ; RV64-NEXT: vsrl.vi v24, v8, 24
571 ; RV64-NEXT: lui a3, 4080
572 ; RV64-NEXT: vand.vx v24, v24, a3
573 ; RV64-NEXT: vsrl.vi v0, v8, 8
574 ; RV64-NEXT: li a4, 255
575 ; RV64-NEXT: slli a4, a4, 24
576 ; RV64-NEXT: vand.vx v0, v0, a4
577 ; RV64-NEXT: vor.vv v24, v0, v24
578 ; RV64-NEXT: vor.vv v16, v24, v16
579 ; RV64-NEXT: vand.vx v24, v8, a3
580 ; RV64-NEXT: vsll.vi v24, v24, 24
581 ; RV64-NEXT: vand.vx v0, v8, a4
582 ; RV64-NEXT: vsll.vi v0, v0, 8
583 ; RV64-NEXT: vor.vv v24, v24, v0
584 ; RV64-NEXT: vsll.vx v0, v8, a0
585 ; RV64-NEXT: vand.vx v8, v8, a2
586 ; RV64-NEXT: vsll.vx v8, v8, a1
587 ; RV64-NEXT: vor.vv v8, v0, v8
588 ; RV64-NEXT: vor.vv v8, v8, v24
589 ; RV64-NEXT: vor.vv v8, v8, v16
592 ; CHECK-ZVKB-LABEL: bswap_nxv8i64:
593 ; CHECK-ZVKB: # %bb.0:
594 ; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
595 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
596 ; CHECK-ZVKB-NEXT: ret
597 %a = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> %va)
598 ret <vscale x 8 x i64> %a
600 declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)