1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
9 define <2 x i16> @vp_bswap_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vp_bswap_v2i16:
12 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
13 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
14 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
15 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
17 %v = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
21 define <2 x i16> @vp_bswap_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
22 ; CHECK-LABEL: vp_bswap_v2i16_unmasked:
24 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
25 ; CHECK-NEXT: vsrl.vi v9, v8, 8
26 ; CHECK-NEXT: vsll.vi v8, v8, 8
27 ; CHECK-NEXT: vor.vv v8, v8, v9
29 %v = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl)
33 declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
35 define <4 x i16> @vp_bswap_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
36 ; CHECK-LABEL: vp_bswap_v4i16:
38 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
39 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
40 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
41 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
43 %v = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
47 define <4 x i16> @vp_bswap_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
48 ; CHECK-LABEL: vp_bswap_v4i16_unmasked:
50 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
51 ; CHECK-NEXT: vsrl.vi v9, v8, 8
52 ; CHECK-NEXT: vsll.vi v8, v8, 8
53 ; CHECK-NEXT: vor.vv v8, v8, v9
55 %v = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
59 declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
61 define <8 x i16> @vp_bswap_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
62 ; CHECK-LABEL: vp_bswap_v8i16:
64 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
65 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
66 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
67 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
69 %v = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
73 define <8 x i16> @vp_bswap_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
74 ; CHECK-LABEL: vp_bswap_v8i16_unmasked:
76 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
77 ; CHECK-NEXT: vsrl.vi v9, v8, 8
78 ; CHECK-NEXT: vsll.vi v8, v8, 8
79 ; CHECK-NEXT: vor.vv v8, v8, v9
81 %v = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl)
85 declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
87 define <16 x i16> @vp_bswap_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
88 ; CHECK-LABEL: vp_bswap_v16i16:
90 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
91 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
92 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
93 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
95 %v = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
99 define <16 x i16> @vp_bswap_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
100 ; CHECK-LABEL: vp_bswap_v16i16_unmasked:
102 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
103 ; CHECK-NEXT: vsrl.vi v10, v8, 8
104 ; CHECK-NEXT: vsll.vi v8, v8, 8
105 ; CHECK-NEXT: vor.vv v8, v8, v10
107 %v = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl)
111 declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
113 define <2 x i32> @vp_bswap_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
114 ; CHECK-LABEL: vp_bswap_v2i32:
116 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
117 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
118 ; CHECK-NEXT: lui a0, 16
119 ; CHECK-NEXT: addi a0, a0, -256
120 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
121 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
122 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
123 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
124 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
125 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
126 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
127 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
129 %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
133 define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
134 ; CHECK-LABEL: vp_bswap_v2i32_unmasked:
136 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
137 ; CHECK-NEXT: vsrl.vi v9, v8, 8
138 ; CHECK-NEXT: lui a0, 16
139 ; CHECK-NEXT: addi a0, a0, -256
140 ; CHECK-NEXT: vand.vx v9, v9, a0
141 ; CHECK-NEXT: vsrl.vi v10, v8, 24
142 ; CHECK-NEXT: vor.vv v9, v9, v10
143 ; CHECK-NEXT: vand.vx v10, v8, a0
144 ; CHECK-NEXT: vsll.vi v10, v10, 8
145 ; CHECK-NEXT: vsll.vi v8, v8, 24
146 ; CHECK-NEXT: vor.vv v8, v8, v10
147 ; CHECK-NEXT: vor.vv v8, v8, v9
149 %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl)
153 declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
155 define <4 x i32> @vp_bswap_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
156 ; CHECK-LABEL: vp_bswap_v4i32:
158 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
159 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
160 ; CHECK-NEXT: lui a0, 16
161 ; CHECK-NEXT: addi a0, a0, -256
162 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
163 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
164 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
165 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
166 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
167 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
168 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
169 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
171 %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
175 define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
176 ; CHECK-LABEL: vp_bswap_v4i32_unmasked:
178 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
179 ; CHECK-NEXT: vsrl.vi v9, v8, 8
180 ; CHECK-NEXT: lui a0, 16
181 ; CHECK-NEXT: addi a0, a0, -256
182 ; CHECK-NEXT: vand.vx v9, v9, a0
183 ; CHECK-NEXT: vsrl.vi v10, v8, 24
184 ; CHECK-NEXT: vor.vv v9, v9, v10
185 ; CHECK-NEXT: vand.vx v10, v8, a0
186 ; CHECK-NEXT: vsll.vi v10, v10, 8
187 ; CHECK-NEXT: vsll.vi v8, v8, 24
188 ; CHECK-NEXT: vor.vv v8, v8, v10
189 ; CHECK-NEXT: vor.vv v8, v8, v9
191 %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
195 declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
197 define <8 x i32> @vp_bswap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
198 ; CHECK-LABEL: vp_bswap_v8i32:
200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
201 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
202 ; CHECK-NEXT: lui a0, 16
203 ; CHECK-NEXT: addi a0, a0, -256
204 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
205 ; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
206 ; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
207 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
208 ; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
209 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
210 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
211 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
213 %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
217 define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
218 ; CHECK-LABEL: vp_bswap_v8i32_unmasked:
220 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
221 ; CHECK-NEXT: vsrl.vi v10, v8, 8
222 ; CHECK-NEXT: lui a0, 16
223 ; CHECK-NEXT: addi a0, a0, -256
224 ; CHECK-NEXT: vand.vx v10, v10, a0
225 ; CHECK-NEXT: vsrl.vi v12, v8, 24
226 ; CHECK-NEXT: vor.vv v10, v10, v12
227 ; CHECK-NEXT: vand.vx v12, v8, a0
228 ; CHECK-NEXT: vsll.vi v12, v12, 8
229 ; CHECK-NEXT: vsll.vi v8, v8, 24
230 ; CHECK-NEXT: vor.vv v8, v8, v12
231 ; CHECK-NEXT: vor.vv v8, v8, v10
233 %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl)
237 declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
239 define <16 x i32> @vp_bswap_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
240 ; CHECK-LABEL: vp_bswap_v16i32:
242 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
243 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
244 ; CHECK-NEXT: lui a0, 16
245 ; CHECK-NEXT: addi a0, a0, -256
246 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
247 ; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
248 ; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
249 ; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
250 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
251 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
252 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
253 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
255 %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
259 define <16 x i32> @vp_bswap_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
260 ; CHECK-LABEL: vp_bswap_v16i32_unmasked:
262 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
263 ; CHECK-NEXT: vsrl.vi v12, v8, 8
264 ; CHECK-NEXT: lui a0, 16
265 ; CHECK-NEXT: addi a0, a0, -256
266 ; CHECK-NEXT: vand.vx v12, v12, a0
267 ; CHECK-NEXT: vsrl.vi v16, v8, 24
268 ; CHECK-NEXT: vor.vv v12, v12, v16
269 ; CHECK-NEXT: vand.vx v16, v8, a0
270 ; CHECK-NEXT: vsll.vi v16, v16, 8
271 ; CHECK-NEXT: vsll.vi v8, v8, 24
272 ; CHECK-NEXT: vor.vv v8, v8, v16
273 ; CHECK-NEXT: vor.vv v8, v8, v12
275 %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl)
279 declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
281 define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
282 ; RV32-LABEL: vp_bswap_v2i64:
284 ; RV32-NEXT: addi sp, sp, -16
285 ; RV32-NEXT: .cfi_def_cfa_offset 16
286 ; RV32-NEXT: sw zero, 12(sp)
287 ; RV32-NEXT: lui a1, 1044480
288 ; RV32-NEXT: sw a1, 8(sp)
289 ; RV32-NEXT: li a1, 56
290 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
291 ; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
292 ; RV32-NEXT: lui a2, 16
293 ; RV32-NEXT: addi a2, a2, -256
294 ; RV32-NEXT: vand.vx v10, v8, a2, v0.t
295 ; RV32-NEXT: li a3, 40
296 ; RV32-NEXT: vsll.vx v10, v10, a3, v0.t
297 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
298 ; RV32-NEXT: addi a4, sp, 8
299 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
300 ; RV32-NEXT: vlse64.v v10, (a4), zero
301 ; RV32-NEXT: lui a4, 4080
302 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
303 ; RV32-NEXT: vand.vx v11, v8, a4, v0.t
304 ; RV32-NEXT: vsll.vi v11, v11, 24, v0.t
305 ; RV32-NEXT: vand.vv v12, v8, v10, v0.t
306 ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
307 ; RV32-NEXT: vor.vv v11, v11, v12, v0.t
308 ; RV32-NEXT: vor.vv v9, v9, v11, v0.t
309 ; RV32-NEXT: vsrl.vx v11, v8, a1, v0.t
310 ; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t
311 ; RV32-NEXT: vand.vx v12, v12, a2, v0.t
312 ; RV32-NEXT: vor.vv v11, v12, v11, v0.t
313 ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
314 ; RV32-NEXT: vand.vx v12, v12, a4, v0.t
315 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
316 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t
317 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
318 ; RV32-NEXT: vor.vv v8, v8, v11, v0.t
319 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
320 ; RV32-NEXT: addi sp, sp, 16
323 ; RV64-LABEL: vp_bswap_v2i64:
325 ; RV64-NEXT: lui a1, 4080
326 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
327 ; RV64-NEXT: vand.vx v9, v8, a1, v0.t
328 ; RV64-NEXT: vsll.vi v9, v9, 24, v0.t
329 ; RV64-NEXT: li a0, 255
330 ; RV64-NEXT: slli a0, a0, 24
331 ; RV64-NEXT: vand.vx v10, v8, a0, v0.t
332 ; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
333 ; RV64-NEXT: vor.vv v9, v9, v10, v0.t
334 ; RV64-NEXT: li a2, 56
335 ; RV64-NEXT: vsll.vx v10, v8, a2, v0.t
336 ; RV64-NEXT: lui a3, 16
337 ; RV64-NEXT: addiw a3, a3, -256
338 ; RV64-NEXT: vand.vx v11, v8, a3, v0.t
339 ; RV64-NEXT: li a4, 40
340 ; RV64-NEXT: vsll.vx v11, v11, a4, v0.t
341 ; RV64-NEXT: vor.vv v10, v10, v11, v0.t
342 ; RV64-NEXT: vor.vv v9, v10, v9, v0.t
343 ; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t
344 ; RV64-NEXT: vsrl.vx v11, v8, a4, v0.t
345 ; RV64-NEXT: vand.vx v11, v11, a3, v0.t
346 ; RV64-NEXT: vor.vv v10, v11, v10, v0.t
347 ; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t
348 ; RV64-NEXT: vand.vx v11, v11, a1, v0.t
349 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
350 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
351 ; RV64-NEXT: vor.vv v8, v8, v11, v0.t
352 ; RV64-NEXT: vor.vv v8, v8, v10, v0.t
353 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
355 %v = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
359 define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
360 ; RV32-LABEL: vp_bswap_v2i64_unmasked:
362 ; RV32-NEXT: addi sp, sp, -16
363 ; RV32-NEXT: .cfi_def_cfa_offset 16
364 ; RV32-NEXT: sw zero, 12(sp)
365 ; RV32-NEXT: lui a1, 1044480
366 ; RV32-NEXT: sw a1, 8(sp)
367 ; RV32-NEXT: li a1, 56
368 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
369 ; RV32-NEXT: vsll.vx v9, v8, a1
370 ; RV32-NEXT: lui a2, 16
371 ; RV32-NEXT: addi a2, a2, -256
372 ; RV32-NEXT: vand.vx v10, v8, a2
373 ; RV32-NEXT: li a3, 40
374 ; RV32-NEXT: vsll.vx v10, v10, a3
375 ; RV32-NEXT: vor.vv v9, v9, v10
376 ; RV32-NEXT: addi a4, sp, 8
377 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
378 ; RV32-NEXT: vlse64.v v10, (a4), zero
379 ; RV32-NEXT: lui a4, 4080
380 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
381 ; RV32-NEXT: vand.vx v11, v8, a4
382 ; RV32-NEXT: vsll.vi v11, v11, 24
383 ; RV32-NEXT: vand.vv v12, v8, v10
384 ; RV32-NEXT: vsll.vi v12, v12, 8
385 ; RV32-NEXT: vor.vv v11, v11, v12
386 ; RV32-NEXT: vor.vv v9, v9, v11
387 ; RV32-NEXT: vsrl.vx v11, v8, a1
388 ; RV32-NEXT: vsrl.vx v12, v8, a3
389 ; RV32-NEXT: vand.vx v12, v12, a2
390 ; RV32-NEXT: vor.vv v11, v12, v11
391 ; RV32-NEXT: vsrl.vi v12, v8, 24
392 ; RV32-NEXT: vand.vx v12, v12, a4
393 ; RV32-NEXT: vsrl.vi v8, v8, 8
394 ; RV32-NEXT: vand.vv v8, v8, v10
395 ; RV32-NEXT: vor.vv v8, v8, v12
396 ; RV32-NEXT: vor.vv v8, v8, v11
397 ; RV32-NEXT: vor.vv v8, v9, v8
398 ; RV32-NEXT: addi sp, sp, 16
401 ; RV64-LABEL: vp_bswap_v2i64_unmasked:
403 ; RV64-NEXT: lui a1, 4080
404 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
405 ; RV64-NEXT: vand.vx v9, v8, a1
406 ; RV64-NEXT: vsll.vi v9, v9, 24
407 ; RV64-NEXT: li a0, 255
408 ; RV64-NEXT: slli a0, a0, 24
409 ; RV64-NEXT: vand.vx v10, v8, a0
410 ; RV64-NEXT: vsll.vi v10, v10, 8
411 ; RV64-NEXT: vor.vv v9, v9, v10
412 ; RV64-NEXT: li a2, 56
413 ; RV64-NEXT: vsll.vx v10, v8, a2
414 ; RV64-NEXT: lui a3, 16
415 ; RV64-NEXT: addiw a3, a3, -256
416 ; RV64-NEXT: vand.vx v11, v8, a3
417 ; RV64-NEXT: li a4, 40
418 ; RV64-NEXT: vsll.vx v11, v11, a4
419 ; RV64-NEXT: vor.vv v10, v10, v11
420 ; RV64-NEXT: vor.vv v9, v10, v9
421 ; RV64-NEXT: vsrl.vx v10, v8, a2
422 ; RV64-NEXT: vsrl.vx v11, v8, a4
423 ; RV64-NEXT: vand.vx v11, v11, a3
424 ; RV64-NEXT: vor.vv v10, v11, v10
425 ; RV64-NEXT: vsrl.vi v11, v8, 24
426 ; RV64-NEXT: vand.vx v11, v11, a1
427 ; RV64-NEXT: vsrl.vi v8, v8, 8
428 ; RV64-NEXT: vand.vx v8, v8, a0
429 ; RV64-NEXT: vor.vv v8, v8, v11
430 ; RV64-NEXT: vor.vv v8, v8, v10
431 ; RV64-NEXT: vor.vv v8, v9, v8
433 %v = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl)
437 declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
439 define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
440 ; RV32-LABEL: vp_bswap_v4i64:
442 ; RV32-NEXT: addi sp, sp, -16
443 ; RV32-NEXT: .cfi_def_cfa_offset 16
444 ; RV32-NEXT: sw zero, 12(sp)
445 ; RV32-NEXT: lui a1, 1044480
446 ; RV32-NEXT: sw a1, 8(sp)
447 ; RV32-NEXT: li a1, 56
448 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
449 ; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
450 ; RV32-NEXT: lui a2, 16
451 ; RV32-NEXT: addi a2, a2, -256
452 ; RV32-NEXT: vand.vx v12, v8, a2, v0.t
453 ; RV32-NEXT: li a3, 40
454 ; RV32-NEXT: vsll.vx v12, v12, a3, v0.t
455 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
456 ; RV32-NEXT: addi a4, sp, 8
457 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
458 ; RV32-NEXT: vlse64.v v12, (a4), zero
459 ; RV32-NEXT: lui a4, 4080
460 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
461 ; RV32-NEXT: vand.vx v14, v8, a4, v0.t
462 ; RV32-NEXT: vsll.vi v14, v14, 24, v0.t
463 ; RV32-NEXT: vand.vv v16, v8, v12, v0.t
464 ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
465 ; RV32-NEXT: vor.vv v14, v14, v16, v0.t
466 ; RV32-NEXT: vor.vv v10, v10, v14, v0.t
467 ; RV32-NEXT: vsrl.vx v14, v8, a1, v0.t
468 ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t
469 ; RV32-NEXT: vand.vx v16, v16, a2, v0.t
470 ; RV32-NEXT: vor.vv v14, v16, v14, v0.t
471 ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
472 ; RV32-NEXT: vand.vx v16, v16, a4, v0.t
473 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
474 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
475 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
476 ; RV32-NEXT: vor.vv v8, v8, v14, v0.t
477 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
478 ; RV32-NEXT: addi sp, sp, 16
481 ; RV64-LABEL: vp_bswap_v4i64:
483 ; RV64-NEXT: lui a1, 4080
484 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
485 ; RV64-NEXT: vand.vx v10, v8, a1, v0.t
486 ; RV64-NEXT: vsll.vi v10, v10, 24, v0.t
487 ; RV64-NEXT: li a0, 255
488 ; RV64-NEXT: slli a0, a0, 24
489 ; RV64-NEXT: vand.vx v12, v8, a0, v0.t
490 ; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
491 ; RV64-NEXT: vor.vv v10, v10, v12, v0.t
492 ; RV64-NEXT: li a2, 56
493 ; RV64-NEXT: vsll.vx v12, v8, a2, v0.t
494 ; RV64-NEXT: lui a3, 16
495 ; RV64-NEXT: addiw a3, a3, -256
496 ; RV64-NEXT: vand.vx v14, v8, a3, v0.t
497 ; RV64-NEXT: li a4, 40
498 ; RV64-NEXT: vsll.vx v14, v14, a4, v0.t
499 ; RV64-NEXT: vor.vv v12, v12, v14, v0.t
500 ; RV64-NEXT: vor.vv v10, v12, v10, v0.t
501 ; RV64-NEXT: vsrl.vx v12, v8, a2, v0.t
502 ; RV64-NEXT: vsrl.vx v14, v8, a4, v0.t
503 ; RV64-NEXT: vand.vx v14, v14, a3, v0.t
504 ; RV64-NEXT: vor.vv v12, v14, v12, v0.t
505 ; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t
506 ; RV64-NEXT: vand.vx v14, v14, a1, v0.t
507 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
508 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
509 ; RV64-NEXT: vor.vv v8, v8, v14, v0.t
510 ; RV64-NEXT: vor.vv v8, v8, v12, v0.t
511 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
513 %v = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
517 define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
518 ; RV32-LABEL: vp_bswap_v4i64_unmasked:
520 ; RV32-NEXT: addi sp, sp, -16
521 ; RV32-NEXT: .cfi_def_cfa_offset 16
522 ; RV32-NEXT: sw zero, 12(sp)
523 ; RV32-NEXT: lui a1, 1044480
524 ; RV32-NEXT: sw a1, 8(sp)
525 ; RV32-NEXT: li a1, 56
526 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
527 ; RV32-NEXT: vsll.vx v10, v8, a1
528 ; RV32-NEXT: lui a2, 16
529 ; RV32-NEXT: addi a2, a2, -256
530 ; RV32-NEXT: vand.vx v12, v8, a2
531 ; RV32-NEXT: li a3, 40
532 ; RV32-NEXT: vsll.vx v12, v12, a3
533 ; RV32-NEXT: vor.vv v10, v10, v12
534 ; RV32-NEXT: addi a4, sp, 8
535 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
536 ; RV32-NEXT: vlse64.v v12, (a4), zero
537 ; RV32-NEXT: lui a4, 4080
538 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
539 ; RV32-NEXT: vand.vx v14, v8, a4
540 ; RV32-NEXT: vsll.vi v14, v14, 24
541 ; RV32-NEXT: vand.vv v16, v8, v12
542 ; RV32-NEXT: vsll.vi v16, v16, 8
543 ; RV32-NEXT: vor.vv v14, v14, v16
544 ; RV32-NEXT: vor.vv v10, v10, v14
545 ; RV32-NEXT: vsrl.vx v14, v8, a1
546 ; RV32-NEXT: vsrl.vx v16, v8, a3
547 ; RV32-NEXT: vand.vx v16, v16, a2
548 ; RV32-NEXT: vor.vv v14, v16, v14
549 ; RV32-NEXT: vsrl.vi v16, v8, 24
550 ; RV32-NEXT: vand.vx v16, v16, a4
551 ; RV32-NEXT: vsrl.vi v8, v8, 8
552 ; RV32-NEXT: vand.vv v8, v8, v12
553 ; RV32-NEXT: vor.vv v8, v8, v16
554 ; RV32-NEXT: vor.vv v8, v8, v14
555 ; RV32-NEXT: vor.vv v8, v10, v8
556 ; RV32-NEXT: addi sp, sp, 16
559 ; RV64-LABEL: vp_bswap_v4i64_unmasked:
561 ; RV64-NEXT: lui a1, 4080
562 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
563 ; RV64-NEXT: vand.vx v10, v8, a1
564 ; RV64-NEXT: vsll.vi v10, v10, 24
565 ; RV64-NEXT: li a0, 255
566 ; RV64-NEXT: slli a0, a0, 24
567 ; RV64-NEXT: vand.vx v12, v8, a0
568 ; RV64-NEXT: vsll.vi v12, v12, 8
569 ; RV64-NEXT: vor.vv v10, v10, v12
570 ; RV64-NEXT: li a2, 56
571 ; RV64-NEXT: vsll.vx v12, v8, a2
572 ; RV64-NEXT: lui a3, 16
573 ; RV64-NEXT: addiw a3, a3, -256
574 ; RV64-NEXT: vand.vx v14, v8, a3
575 ; RV64-NEXT: li a4, 40
576 ; RV64-NEXT: vsll.vx v14, v14, a4
577 ; RV64-NEXT: vor.vv v12, v12, v14
578 ; RV64-NEXT: vor.vv v10, v12, v10
579 ; RV64-NEXT: vsrl.vx v12, v8, a2
580 ; RV64-NEXT: vsrl.vx v14, v8, a4
581 ; RV64-NEXT: vand.vx v14, v14, a3
582 ; RV64-NEXT: vor.vv v12, v14, v12
583 ; RV64-NEXT: vsrl.vi v14, v8, 24
584 ; RV64-NEXT: vand.vx v14, v14, a1
585 ; RV64-NEXT: vsrl.vi v8, v8, 8
586 ; RV64-NEXT: vand.vx v8, v8, a0
587 ; RV64-NEXT: vor.vv v8, v8, v14
588 ; RV64-NEXT: vor.vv v8, v8, v12
589 ; RV64-NEXT: vor.vv v8, v10, v8
591 %v = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
595 declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
597 define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
598 ; RV32-LABEL: vp_bswap_v8i64:
600 ; RV32-NEXT: addi sp, sp, -16
601 ; RV32-NEXT: .cfi_def_cfa_offset 16
602 ; RV32-NEXT: sw zero, 12(sp)
603 ; RV32-NEXT: lui a1, 1044480
604 ; RV32-NEXT: sw a1, 8(sp)
605 ; RV32-NEXT: li a1, 56
606 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
607 ; RV32-NEXT: vsll.vx v12, v8, a1, v0.t
608 ; RV32-NEXT: lui a2, 16
609 ; RV32-NEXT: addi a2, a2, -256
610 ; RV32-NEXT: vand.vx v16, v8, a2, v0.t
611 ; RV32-NEXT: li a3, 40
612 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t
613 ; RV32-NEXT: vor.vv v16, v12, v16, v0.t
614 ; RV32-NEXT: addi a4, sp, 8
615 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
616 ; RV32-NEXT: vlse64.v v12, (a4), zero
617 ; RV32-NEXT: lui a4, 4080
618 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
619 ; RV32-NEXT: vand.vx v20, v8, a4, v0.t
620 ; RV32-NEXT: vsll.vi v20, v20, 24, v0.t
621 ; RV32-NEXT: vand.vv v24, v8, v12, v0.t
622 ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
623 ; RV32-NEXT: vor.vv v20, v20, v24, v0.t
624 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
625 ; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t
626 ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
627 ; RV32-NEXT: vand.vx v24, v24, a2, v0.t
628 ; RV32-NEXT: vor.vv v20, v24, v20, v0.t
629 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
630 ; RV32-NEXT: vand.vx v24, v24, a4, v0.t
631 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
632 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
633 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
634 ; RV32-NEXT: vor.vv v8, v8, v20, v0.t
635 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
636 ; RV32-NEXT: addi sp, sp, 16
639 ; RV64-LABEL: vp_bswap_v8i64:
641 ; RV64-NEXT: lui a1, 4080
642 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
643 ; RV64-NEXT: vand.vx v12, v8, a1, v0.t
644 ; RV64-NEXT: vsll.vi v12, v12, 24, v0.t
645 ; RV64-NEXT: li a0, 255
646 ; RV64-NEXT: slli a0, a0, 24
647 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
648 ; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
649 ; RV64-NEXT: vor.vv v12, v12, v16, v0.t
650 ; RV64-NEXT: li a2, 56
651 ; RV64-NEXT: vsll.vx v16, v8, a2, v0.t
652 ; RV64-NEXT: lui a3, 16
653 ; RV64-NEXT: addiw a3, a3, -256
654 ; RV64-NEXT: vand.vx v20, v8, a3, v0.t
655 ; RV64-NEXT: li a4, 40
656 ; RV64-NEXT: vsll.vx v20, v20, a4, v0.t
657 ; RV64-NEXT: vor.vv v16, v16, v20, v0.t
658 ; RV64-NEXT: vor.vv v12, v16, v12, v0.t
659 ; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
660 ; RV64-NEXT: vsrl.vx v20, v8, a4, v0.t
661 ; RV64-NEXT: vand.vx v20, v20, a3, v0.t
662 ; RV64-NEXT: vor.vv v16, v20, v16, v0.t
663 ; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t
664 ; RV64-NEXT: vand.vx v20, v20, a1, v0.t
665 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
666 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
667 ; RV64-NEXT: vor.vv v8, v8, v20, v0.t
668 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
669 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
671 %v = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
675 define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
676 ; RV32-LABEL: vp_bswap_v8i64_unmasked:
678 ; RV32-NEXT: addi sp, sp, -16
679 ; RV32-NEXT: .cfi_def_cfa_offset 16
680 ; RV32-NEXT: sw zero, 12(sp)
681 ; RV32-NEXT: lui a1, 1044480
682 ; RV32-NEXT: sw a1, 8(sp)
683 ; RV32-NEXT: li a1, 56
684 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
685 ; RV32-NEXT: vsll.vx v12, v8, a1
686 ; RV32-NEXT: lui a2, 16
687 ; RV32-NEXT: addi a2, a2, -256
688 ; RV32-NEXT: vand.vx v16, v8, a2
689 ; RV32-NEXT: li a3, 40
690 ; RV32-NEXT: vsll.vx v16, v16, a3
691 ; RV32-NEXT: vor.vv v12, v12, v16
692 ; RV32-NEXT: addi a4, sp, 8
693 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
694 ; RV32-NEXT: vlse64.v v16, (a4), zero
695 ; RV32-NEXT: lui a4, 4080
696 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
697 ; RV32-NEXT: vand.vx v20, v8, a4
698 ; RV32-NEXT: vsll.vi v20, v20, 24
699 ; RV32-NEXT: vand.vv v24, v8, v16
700 ; RV32-NEXT: vsll.vi v24, v24, 8
701 ; RV32-NEXT: vor.vv v20, v20, v24
702 ; RV32-NEXT: vor.vv v12, v12, v20
703 ; RV32-NEXT: vsrl.vx v20, v8, a1
704 ; RV32-NEXT: vsrl.vx v24, v8, a3
705 ; RV32-NEXT: vand.vx v24, v24, a2
706 ; RV32-NEXT: vor.vv v20, v24, v20
707 ; RV32-NEXT: vsrl.vi v24, v8, 24
708 ; RV32-NEXT: vand.vx v24, v24, a4
709 ; RV32-NEXT: vsrl.vi v8, v8, 8
710 ; RV32-NEXT: vand.vv v8, v8, v16
711 ; RV32-NEXT: vor.vv v8, v8, v24
712 ; RV32-NEXT: vor.vv v8, v8, v20
713 ; RV32-NEXT: vor.vv v8, v12, v8
714 ; RV32-NEXT: addi sp, sp, 16
717 ; RV64-LABEL: vp_bswap_v8i64_unmasked:
719 ; RV64-NEXT: lui a1, 4080
720 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
721 ; RV64-NEXT: vand.vx v12, v8, a1
722 ; RV64-NEXT: vsll.vi v12, v12, 24
723 ; RV64-NEXT: li a0, 255
724 ; RV64-NEXT: slli a0, a0, 24
725 ; RV64-NEXT: vand.vx v16, v8, a0
726 ; RV64-NEXT: vsll.vi v16, v16, 8
727 ; RV64-NEXT: vor.vv v12, v12, v16
728 ; RV64-NEXT: li a2, 56
729 ; RV64-NEXT: vsll.vx v16, v8, a2
730 ; RV64-NEXT: lui a3, 16
731 ; RV64-NEXT: addiw a3, a3, -256
732 ; RV64-NEXT: vand.vx v20, v8, a3
733 ; RV64-NEXT: li a4, 40
734 ; RV64-NEXT: vsll.vx v20, v20, a4
735 ; RV64-NEXT: vor.vv v16, v16, v20
736 ; RV64-NEXT: vor.vv v12, v16, v12
737 ; RV64-NEXT: vsrl.vx v16, v8, a2
738 ; RV64-NEXT: vsrl.vx v20, v8, a4
739 ; RV64-NEXT: vand.vx v20, v20, a3
740 ; RV64-NEXT: vor.vv v16, v20, v16
741 ; RV64-NEXT: vsrl.vi v20, v8, 24
742 ; RV64-NEXT: vand.vx v20, v20, a1
743 ; RV64-NEXT: vsrl.vi v8, v8, 8
744 ; RV64-NEXT: vand.vx v8, v8, a0
745 ; RV64-NEXT: vor.vv v8, v8, v20
746 ; RV64-NEXT: vor.vv v8, v8, v16
747 ; RV64-NEXT: vor.vv v8, v12, v8
749 %v = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl)
753 declare <15 x i64> @llvm.vp.bswap.v15i64(<15 x i64>, <15 x i1>, i32)
755 define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
756 ; RV32-LABEL: vp_bswap_v15i64:
758 ; RV32-NEXT: addi sp, sp, -16
759 ; RV32-NEXT: .cfi_def_cfa_offset 16
760 ; RV32-NEXT: csrr a1, vlenb
761 ; RV32-NEXT: li a2, 24
762 ; RV32-NEXT: mul a1, a1, a2
763 ; RV32-NEXT: sub sp, sp, a1
764 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
765 ; RV32-NEXT: sw zero, 12(sp)
766 ; RV32-NEXT: lui a1, 1044480
767 ; RV32-NEXT: sw a1, 8(sp)
768 ; RV32-NEXT: li a1, 56
769 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
770 ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
771 ; RV32-NEXT: lui a2, 16
772 ; RV32-NEXT: addi a2, a2, -256
773 ; RV32-NEXT: vand.vx v24, v8, a2, v0.t
774 ; RV32-NEXT: li a3, 40
775 ; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
776 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
777 ; RV32-NEXT: csrr a4, vlenb
778 ; RV32-NEXT: slli a4, a4, 4
779 ; RV32-NEXT: add a4, sp, a4
780 ; RV32-NEXT: addi a4, a4, 16
781 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
782 ; RV32-NEXT: addi a4, sp, 8
783 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
784 ; RV32-NEXT: vlse64.v v16, (a4), zero
785 ; RV32-NEXT: csrr a4, vlenb
786 ; RV32-NEXT: slli a4, a4, 3
787 ; RV32-NEXT: add a4, sp, a4
788 ; RV32-NEXT: addi a4, a4, 16
789 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
790 ; RV32-NEXT: lui a4, 4080
791 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
792 ; RV32-NEXT: vand.vx v24, v8, a4, v0.t
793 ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
794 ; RV32-NEXT: addi a0, sp, 16
795 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
796 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
797 ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
798 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
799 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
800 ; RV32-NEXT: csrr a0, vlenb
801 ; RV32-NEXT: slli a0, a0, 4
802 ; RV32-NEXT: add a0, sp, a0
803 ; RV32-NEXT: addi a0, a0, 16
804 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
805 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
806 ; RV32-NEXT: csrr a0, vlenb
807 ; RV32-NEXT: slli a0, a0, 4
808 ; RV32-NEXT: add a0, sp, a0
809 ; RV32-NEXT: addi a0, a0, 16
810 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
811 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
812 ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
813 ; RV32-NEXT: vand.vx v24, v24, a2, v0.t
814 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
815 ; RV32-NEXT: addi a0, sp, 16
816 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
817 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
818 ; RV32-NEXT: vand.vx v24, v24, a4, v0.t
819 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
820 ; RV32-NEXT: csrr a0, vlenb
821 ; RV32-NEXT: slli a0, a0, 3
822 ; RV32-NEXT: add a0, sp, a0
823 ; RV32-NEXT: addi a0, a0, 16
824 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
825 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
826 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
827 ; RV32-NEXT: addi a0, sp, 16
828 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
829 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
830 ; RV32-NEXT: csrr a0, vlenb
831 ; RV32-NEXT: slli a0, a0, 4
832 ; RV32-NEXT: add a0, sp, a0
833 ; RV32-NEXT: addi a0, a0, 16
834 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
835 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
836 ; RV32-NEXT: csrr a0, vlenb
837 ; RV32-NEXT: li a1, 24
838 ; RV32-NEXT: mul a0, a0, a1
839 ; RV32-NEXT: add sp, sp, a0
840 ; RV32-NEXT: addi sp, sp, 16
843 ; RV64-LABEL: vp_bswap_v15i64:
845 ; RV64-NEXT: addi sp, sp, -16
846 ; RV64-NEXT: .cfi_def_cfa_offset 16
847 ; RV64-NEXT: csrr a1, vlenb
848 ; RV64-NEXT: slli a1, a1, 3
849 ; RV64-NEXT: sub sp, sp, a1
850 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
851 ; RV64-NEXT: lui a1, 4080
852 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
853 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
854 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
855 ; RV64-NEXT: li a0, 255
856 ; RV64-NEXT: slli a0, a0, 24
857 ; RV64-NEXT: vand.vx v24, v8, a0, v0.t
858 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
859 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
860 ; RV64-NEXT: addi a2, sp, 16
861 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
862 ; RV64-NEXT: li a2, 56
863 ; RV64-NEXT: vsll.vx v24, v8, a2, v0.t
864 ; RV64-NEXT: lui a3, 16
865 ; RV64-NEXT: addiw a3, a3, -256
866 ; RV64-NEXT: li a4, 40
867 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t
868 ; RV64-NEXT: vsll.vx v16, v16, a4, v0.t
869 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
870 ; RV64-NEXT: addi a5, sp, 16
871 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
872 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
873 ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
874 ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t
875 ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
876 ; RV64-NEXT: vand.vx v16, v16, a3, v0.t
877 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
878 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
879 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
880 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
881 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
882 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
883 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
884 ; RV64-NEXT: addi a0, sp, 16
885 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
886 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
887 ; RV64-NEXT: csrr a0, vlenb
888 ; RV64-NEXT: slli a0, a0, 3
889 ; RV64-NEXT: add sp, sp, a0
890 ; RV64-NEXT: addi sp, sp, 16
892 %v = call <15 x i64> @llvm.vp.bswap.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
896 define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
897 ; RV32-LABEL: vp_bswap_v15i64_unmasked:
899 ; RV32-NEXT: addi sp, sp, -16
900 ; RV32-NEXT: .cfi_def_cfa_offset 16
901 ; RV32-NEXT: csrr a1, vlenb
902 ; RV32-NEXT: slli a1, a1, 3
903 ; RV32-NEXT: sub sp, sp, a1
904 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
905 ; RV32-NEXT: sw zero, 12(sp)
906 ; RV32-NEXT: lui a1, 1044480
907 ; RV32-NEXT: sw a1, 8(sp)
908 ; RV32-NEXT: li a1, 56
909 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
910 ; RV32-NEXT: vsll.vx v16, v8, a1
911 ; RV32-NEXT: lui a2, 16
912 ; RV32-NEXT: addi a2, a2, -256
913 ; RV32-NEXT: vand.vx v24, v8, a2
914 ; RV32-NEXT: li a3, 40
915 ; RV32-NEXT: vsll.vx v24, v24, a3
916 ; RV32-NEXT: vor.vv v16, v16, v24
917 ; RV32-NEXT: addi a4, sp, 16
918 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
919 ; RV32-NEXT: addi a4, sp, 8
920 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
921 ; RV32-NEXT: vlse64.v v16, (a4), zero
922 ; RV32-NEXT: lui a4, 4080
923 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
924 ; RV32-NEXT: vand.vx v0, v8, a4
925 ; RV32-NEXT: vsll.vi v0, v0, 24
926 ; RV32-NEXT: vand.vv v24, v8, v16
927 ; RV32-NEXT: vsll.vi v24, v24, 8
928 ; RV32-NEXT: vor.vv v24, v0, v24
929 ; RV32-NEXT: addi a0, sp, 16
930 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
931 ; RV32-NEXT: vor.vv v24, v0, v24
932 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
933 ; RV32-NEXT: vsrl.vx v0, v8, a3
934 ; RV32-NEXT: vand.vx v0, v0, a2
935 ; RV32-NEXT: vsrl.vx v24, v8, a1
936 ; RV32-NEXT: vor.vv v24, v0, v24
937 ; RV32-NEXT: vsrl.vi v0, v8, 8
938 ; RV32-NEXT: vand.vv v16, v0, v16
939 ; RV32-NEXT: vsrl.vi v8, v8, 24
940 ; RV32-NEXT: vand.vx v8, v8, a4
941 ; RV32-NEXT: vor.vv v8, v16, v8
942 ; RV32-NEXT: vor.vv v8, v8, v24
943 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
944 ; RV32-NEXT: vor.vv v8, v16, v8
945 ; RV32-NEXT: csrr a0, vlenb
946 ; RV32-NEXT: slli a0, a0, 3
947 ; RV32-NEXT: add sp, sp, a0
948 ; RV32-NEXT: addi sp, sp, 16
951 ; RV64-LABEL: vp_bswap_v15i64_unmasked:
953 ; RV64-NEXT: lui a1, 4080
954 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
955 ; RV64-NEXT: vand.vx v16, v8, a1
956 ; RV64-NEXT: vsll.vi v16, v16, 24
957 ; RV64-NEXT: li a0, 255
958 ; RV64-NEXT: slli a0, a0, 24
959 ; RV64-NEXT: vand.vx v24, v8, a0
960 ; RV64-NEXT: vsll.vi v24, v24, 8
961 ; RV64-NEXT: vor.vv v16, v16, v24
962 ; RV64-NEXT: li a2, 56
963 ; RV64-NEXT: vsll.vx v24, v8, a2
964 ; RV64-NEXT: lui a3, 16
965 ; RV64-NEXT: addiw a3, a3, -256
966 ; RV64-NEXT: vand.vx v0, v8, a3
967 ; RV64-NEXT: li a4, 40
968 ; RV64-NEXT: vsll.vx v0, v0, a4
969 ; RV64-NEXT: vor.vv v24, v24, v0
970 ; RV64-NEXT: vor.vv v16, v24, v16
971 ; RV64-NEXT: vsrl.vx v24, v8, a2
972 ; RV64-NEXT: vsrl.vx v0, v8, a4
973 ; RV64-NEXT: vand.vx v0, v0, a3
974 ; RV64-NEXT: vor.vv v24, v0, v24
975 ; RV64-NEXT: vsrl.vi v0, v8, 24
976 ; RV64-NEXT: vand.vx v0, v0, a1
977 ; RV64-NEXT: vsrl.vi v8, v8, 8
978 ; RV64-NEXT: vand.vx v8, v8, a0
979 ; RV64-NEXT: vor.vv v8, v8, v0
980 ; RV64-NEXT: vor.vv v8, v8, v24
981 ; RV64-NEXT: vor.vv v8, v16, v8
983 %v = call <15 x i64> @llvm.vp.bswap.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl)
987 declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
989 define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
990 ; RV32-LABEL: vp_bswap_v16i64:
992 ; RV32-NEXT: addi sp, sp, -16
993 ; RV32-NEXT: .cfi_def_cfa_offset 16
994 ; RV32-NEXT: csrr a1, vlenb
995 ; RV32-NEXT: li a2, 24
996 ; RV32-NEXT: mul a1, a1, a2
997 ; RV32-NEXT: sub sp, sp, a1
998 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
999 ; RV32-NEXT: sw zero, 12(sp)
1000 ; RV32-NEXT: lui a1, 1044480
1001 ; RV32-NEXT: sw a1, 8(sp)
1002 ; RV32-NEXT: li a1, 56
1003 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1004 ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
1005 ; RV32-NEXT: lui a2, 16
1006 ; RV32-NEXT: addi a2, a2, -256
1007 ; RV32-NEXT: vand.vx v24, v8, a2, v0.t
1008 ; RV32-NEXT: li a3, 40
1009 ; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
1010 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
1011 ; RV32-NEXT: csrr a4, vlenb
1012 ; RV32-NEXT: slli a4, a4, 4
1013 ; RV32-NEXT: add a4, sp, a4
1014 ; RV32-NEXT: addi a4, a4, 16
1015 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1016 ; RV32-NEXT: addi a4, sp, 8
1017 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1018 ; RV32-NEXT: vlse64.v v16, (a4), zero
1019 ; RV32-NEXT: csrr a4, vlenb
1020 ; RV32-NEXT: slli a4, a4, 3
1021 ; RV32-NEXT: add a4, sp, a4
1022 ; RV32-NEXT: addi a4, a4, 16
1023 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1024 ; RV32-NEXT: lui a4, 4080
1025 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1026 ; RV32-NEXT: vand.vx v24, v8, a4, v0.t
1027 ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
1028 ; RV32-NEXT: addi a0, sp, 16
1029 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
1030 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1031 ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
1032 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1033 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1034 ; RV32-NEXT: csrr a0, vlenb
1035 ; RV32-NEXT: slli a0, a0, 4
1036 ; RV32-NEXT: add a0, sp, a0
1037 ; RV32-NEXT: addi a0, a0, 16
1038 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1039 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1040 ; RV32-NEXT: csrr a0, vlenb
1041 ; RV32-NEXT: slli a0, a0, 4
1042 ; RV32-NEXT: add a0, sp, a0
1043 ; RV32-NEXT: addi a0, a0, 16
1044 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1045 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
1046 ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
1047 ; RV32-NEXT: vand.vx v24, v24, a2, v0.t
1048 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1049 ; RV32-NEXT: addi a0, sp, 16
1050 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1051 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
1052 ; RV32-NEXT: vand.vx v24, v24, a4, v0.t
1053 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1054 ; RV32-NEXT: csrr a0, vlenb
1055 ; RV32-NEXT: slli a0, a0, 3
1056 ; RV32-NEXT: add a0, sp, a0
1057 ; RV32-NEXT: addi a0, a0, 16
1058 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1059 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1060 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
1061 ; RV32-NEXT: addi a0, sp, 16
1062 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1063 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1064 ; RV32-NEXT: csrr a0, vlenb
1065 ; RV32-NEXT: slli a0, a0, 4
1066 ; RV32-NEXT: add a0, sp, a0
1067 ; RV32-NEXT: addi a0, a0, 16
1068 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1069 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1070 ; RV32-NEXT: csrr a0, vlenb
1071 ; RV32-NEXT: li a1, 24
1072 ; RV32-NEXT: mul a0, a0, a1
1073 ; RV32-NEXT: add sp, sp, a0
1074 ; RV32-NEXT: addi sp, sp, 16
1077 ; RV64-LABEL: vp_bswap_v16i64:
1079 ; RV64-NEXT: addi sp, sp, -16
1080 ; RV64-NEXT: .cfi_def_cfa_offset 16
1081 ; RV64-NEXT: csrr a1, vlenb
1082 ; RV64-NEXT: slli a1, a1, 3
1083 ; RV64-NEXT: sub sp, sp, a1
1084 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1085 ; RV64-NEXT: lui a1, 4080
1086 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1087 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
1088 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
1089 ; RV64-NEXT: li a0, 255
1090 ; RV64-NEXT: slli a0, a0, 24
1091 ; RV64-NEXT: vand.vx v24, v8, a0, v0.t
1092 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
1093 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
1094 ; RV64-NEXT: addi a2, sp, 16
1095 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1096 ; RV64-NEXT: li a2, 56
1097 ; RV64-NEXT: vsll.vx v24, v8, a2, v0.t
1098 ; RV64-NEXT: lui a3, 16
1099 ; RV64-NEXT: addiw a3, a3, -256
1100 ; RV64-NEXT: li a4, 40
1101 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t
1102 ; RV64-NEXT: vsll.vx v16, v16, a4, v0.t
1103 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
1104 ; RV64-NEXT: addi a5, sp, 16
1105 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
1106 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
1107 ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
1108 ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t
1109 ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
1110 ; RV64-NEXT: vand.vx v16, v16, a3, v0.t
1111 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
1112 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
1113 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1114 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1115 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t
1116 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
1117 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
1118 ; RV64-NEXT: addi a0, sp, 16
1119 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1120 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
1121 ; RV64-NEXT: csrr a0, vlenb
1122 ; RV64-NEXT: slli a0, a0, 3
1123 ; RV64-NEXT: add sp, sp, a0
1124 ; RV64-NEXT: addi sp, sp, 16
1126 %v = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
1130 define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1131 ; RV32-LABEL: vp_bswap_v16i64_unmasked:
1133 ; RV32-NEXT: addi sp, sp, -16
1134 ; RV32-NEXT: .cfi_def_cfa_offset 16
1135 ; RV32-NEXT: csrr a1, vlenb
1136 ; RV32-NEXT: slli a1, a1, 3
1137 ; RV32-NEXT: sub sp, sp, a1
1138 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1139 ; RV32-NEXT: sw zero, 12(sp)
1140 ; RV32-NEXT: lui a1, 1044480
1141 ; RV32-NEXT: sw a1, 8(sp)
1142 ; RV32-NEXT: li a1, 56
1143 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1144 ; RV32-NEXT: vsll.vx v16, v8, a1
1145 ; RV32-NEXT: lui a2, 16
1146 ; RV32-NEXT: addi a2, a2, -256
1147 ; RV32-NEXT: vand.vx v24, v8, a2
1148 ; RV32-NEXT: li a3, 40
1149 ; RV32-NEXT: vsll.vx v24, v24, a3
1150 ; RV32-NEXT: vor.vv v16, v16, v24
1151 ; RV32-NEXT: addi a4, sp, 16
1152 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1153 ; RV32-NEXT: addi a4, sp, 8
1154 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1155 ; RV32-NEXT: vlse64.v v16, (a4), zero
1156 ; RV32-NEXT: lui a4, 4080
1157 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1158 ; RV32-NEXT: vand.vx v0, v8, a4
1159 ; RV32-NEXT: vsll.vi v0, v0, 24
1160 ; RV32-NEXT: vand.vv v24, v8, v16
1161 ; RV32-NEXT: vsll.vi v24, v24, 8
1162 ; RV32-NEXT: vor.vv v24, v0, v24
1163 ; RV32-NEXT: addi a0, sp, 16
1164 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
1165 ; RV32-NEXT: vor.vv v24, v0, v24
1166 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
1167 ; RV32-NEXT: vsrl.vx v0, v8, a3
1168 ; RV32-NEXT: vand.vx v0, v0, a2
1169 ; RV32-NEXT: vsrl.vx v24, v8, a1
1170 ; RV32-NEXT: vor.vv v24, v0, v24
1171 ; RV32-NEXT: vsrl.vi v0, v8, 8
1172 ; RV32-NEXT: vand.vv v16, v0, v16
1173 ; RV32-NEXT: vsrl.vi v8, v8, 24
1174 ; RV32-NEXT: vand.vx v8, v8, a4
1175 ; RV32-NEXT: vor.vv v8, v16, v8
1176 ; RV32-NEXT: vor.vv v8, v8, v24
1177 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1178 ; RV32-NEXT: vor.vv v8, v16, v8
1179 ; RV32-NEXT: csrr a0, vlenb
1180 ; RV32-NEXT: slli a0, a0, 3
1181 ; RV32-NEXT: add sp, sp, a0
1182 ; RV32-NEXT: addi sp, sp, 16
1185 ; RV64-LABEL: vp_bswap_v16i64_unmasked:
1187 ; RV64-NEXT: lui a1, 4080
1188 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1189 ; RV64-NEXT: vand.vx v16, v8, a1
1190 ; RV64-NEXT: vsll.vi v16, v16, 24
1191 ; RV64-NEXT: li a0, 255
1192 ; RV64-NEXT: slli a0, a0, 24
1193 ; RV64-NEXT: vand.vx v24, v8, a0
1194 ; RV64-NEXT: vsll.vi v24, v24, 8
1195 ; RV64-NEXT: vor.vv v16, v16, v24
1196 ; RV64-NEXT: li a2, 56
1197 ; RV64-NEXT: vsll.vx v24, v8, a2
1198 ; RV64-NEXT: lui a3, 16
1199 ; RV64-NEXT: addiw a3, a3, -256
1200 ; RV64-NEXT: vand.vx v0, v8, a3
1201 ; RV64-NEXT: li a4, 40
1202 ; RV64-NEXT: vsll.vx v0, v0, a4
1203 ; RV64-NEXT: vor.vv v24, v24, v0
1204 ; RV64-NEXT: vor.vv v16, v24, v16
1205 ; RV64-NEXT: vsrl.vx v24, v8, a2
1206 ; RV64-NEXT: vsrl.vx v0, v8, a4
1207 ; RV64-NEXT: vand.vx v0, v0, a3
1208 ; RV64-NEXT: vor.vv v24, v0, v24
1209 ; RV64-NEXT: vsrl.vi v0, v8, 24
1210 ; RV64-NEXT: vand.vx v0, v0, a1
1211 ; RV64-NEXT: vsrl.vi v8, v8, 8
1212 ; RV64-NEXT: vand.vx v8, v8, a0
1213 ; RV64-NEXT: vor.vv v8, v8, v0
1214 ; RV64-NEXT: vor.vv v8, v8, v24
1215 ; RV64-NEXT: vor.vv v8, v16, v8
1217 %v = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl)
1221 declare <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16>, <128 x i1>, i32)
1223 define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) {
1224 ; CHECK-LABEL: vp_bswap_v128i16:
1226 ; CHECK-NEXT: addi sp, sp, -16
1227 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1228 ; CHECK-NEXT: csrr a1, vlenb
1229 ; CHECK-NEXT: slli a1, a1, 4
1230 ; CHECK-NEXT: sub sp, sp, a1
1231 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1232 ; CHECK-NEXT: csrr a1, vlenb
1233 ; CHECK-NEXT: slli a1, a1, 3
1234 ; CHECK-NEXT: add a1, sp, a1
1235 ; CHECK-NEXT: addi a1, a1, 16
1236 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1237 ; CHECK-NEXT: li a2, 64
1238 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
1239 ; CHECK-NEXT: vslidedown.vi v24, v0, 8
1240 ; CHECK-NEXT: mv a1, a0
1241 ; CHECK-NEXT: bltu a0, a2, .LBB26_2
1242 ; CHECK-NEXT: # %bb.1:
1243 ; CHECK-NEXT: li a1, 64
1244 ; CHECK-NEXT: .LBB26_2:
1245 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
1246 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
1247 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
1248 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
1249 ; CHECK-NEXT: addi a1, sp, 16
1250 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1251 ; CHECK-NEXT: addi a1, a0, -64
1252 ; CHECK-NEXT: sltu a0, a0, a1
1253 ; CHECK-NEXT: addi a0, a0, -1
1254 ; CHECK-NEXT: and a0, a0, a1
1255 ; CHECK-NEXT: vmv1r.v v0, v24
1256 ; CHECK-NEXT: csrr a1, vlenb
1257 ; CHECK-NEXT: slli a1, a1, 3
1258 ; CHECK-NEXT: add a1, sp, a1
1259 ; CHECK-NEXT: addi a1, a1, 16
1260 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1261 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1262 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
1263 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
1264 ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
1265 ; CHECK-NEXT: addi a0, sp, 16
1266 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1267 ; CHECK-NEXT: csrr a0, vlenb
1268 ; CHECK-NEXT: slli a0, a0, 4
1269 ; CHECK-NEXT: add sp, sp, a0
1270 ; CHECK-NEXT: addi sp, sp, 16
1272 %v = call <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
1276 define <128 x i16> @vp_bswap_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) {
1277 ; CHECK-LABEL: vp_bswap_v128i16_unmasked:
1279 ; CHECK-NEXT: li a2, 64
1280 ; CHECK-NEXT: mv a1, a0
1281 ; CHECK-NEXT: bltu a0, a2, .LBB27_2
1282 ; CHECK-NEXT: # %bb.1:
1283 ; CHECK-NEXT: li a1, 64
1284 ; CHECK-NEXT: .LBB27_2:
1285 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
1286 ; CHECK-NEXT: vsrl.vi v24, v8, 8
1287 ; CHECK-NEXT: vsll.vi v8, v8, 8
1288 ; CHECK-NEXT: vor.vv v8, v8, v24
1289 ; CHECK-NEXT: addi a1, a0, -64
1290 ; CHECK-NEXT: sltu a0, a0, a1
1291 ; CHECK-NEXT: addi a0, a0, -1
1292 ; CHECK-NEXT: and a0, a0, a1
1293 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1294 ; CHECK-NEXT: vsrl.vi v24, v16, 8
1295 ; CHECK-NEXT: vsll.vi v16, v16, 8
1296 ; CHECK-NEXT: vor.vv v16, v16, v24
1298 %v = call <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16> %va, <128 x i1> splat (i1 true), i32 %evl)