1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
8 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
11 declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
13 define <vscale x 1 x i16> @vp_bswap_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; CHECK-LABEL: vp_bswap_nxv1i16:
16 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
17 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
18 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
19 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
22 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i16:
23 ; CHECK-ZVKB: # %bb.0:
24 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
25 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
26 ; CHECK-ZVKB-NEXT: ret
27 %v = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
28 ret <vscale x 1 x i16> %v
31 define <vscale x 1 x i16> @vp_bswap_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
32 ; CHECK-LABEL: vp_bswap_nxv1i16_unmasked:
34 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
35 ; CHECK-NEXT: vsrl.vi v9, v8, 8
36 ; CHECK-NEXT: vsll.vi v8, v8, 8
37 ; CHECK-NEXT: vor.vv v8, v8, v9
40 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i16_unmasked:
41 ; CHECK-ZVKB: # %bb.0:
42 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
43 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
44 ; CHECK-ZVKB-NEXT: ret
45 %v = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
46 ret <vscale x 1 x i16> %v
49 declare <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
51 define <vscale x 2 x i16> @vp_bswap_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
52 ; CHECK-LABEL: vp_bswap_nxv2i16:
54 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
55 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
56 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
57 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
60 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i16:
61 ; CHECK-ZVKB: # %bb.0:
62 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
63 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
64 ; CHECK-ZVKB-NEXT: ret
65 %v = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
66 ret <vscale x 2 x i16> %v
69 define <vscale x 2 x i16> @vp_bswap_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
70 ; CHECK-LABEL: vp_bswap_nxv2i16_unmasked:
72 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
73 ; CHECK-NEXT: vsrl.vi v9, v8, 8
74 ; CHECK-NEXT: vsll.vi v8, v8, 8
75 ; CHECK-NEXT: vor.vv v8, v8, v9
78 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i16_unmasked:
79 ; CHECK-ZVKB: # %bb.0:
80 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
81 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
82 ; CHECK-ZVKB-NEXT: ret
83 %v = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
84 ret <vscale x 2 x i16> %v
87 declare <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
89 define <vscale x 4 x i16> @vp_bswap_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
90 ; CHECK-LABEL: vp_bswap_nxv4i16:
92 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
93 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
94 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
95 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
98 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i16:
99 ; CHECK-ZVKB: # %bb.0:
100 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
101 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
102 ; CHECK-ZVKB-NEXT: ret
103 %v = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
104 ret <vscale x 4 x i16> %v
107 define <vscale x 4 x i16> @vp_bswap_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
108 ; CHECK-LABEL: vp_bswap_nxv4i16_unmasked:
110 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
111 ; CHECK-NEXT: vsrl.vi v9, v8, 8
112 ; CHECK-NEXT: vsll.vi v8, v8, 8
113 ; CHECK-NEXT: vor.vv v8, v8, v9
116 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i16_unmasked:
117 ; CHECK-ZVKB: # %bb.0:
118 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
119 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
120 ; CHECK-ZVKB-NEXT: ret
121 %v = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
122 ret <vscale x 4 x i16> %v
125 declare <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
127 define <vscale x 8 x i16> @vp_bswap_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
128 ; CHECK-LABEL: vp_bswap_nxv8i16:
130 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
131 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
132 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
133 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
136 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i16:
137 ; CHECK-ZVKB: # %bb.0:
138 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
139 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
140 ; CHECK-ZVKB-NEXT: ret
141 %v = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
142 ret <vscale x 8 x i16> %v
145 define <vscale x 8 x i16> @vp_bswap_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
146 ; CHECK-LABEL: vp_bswap_nxv8i16_unmasked:
148 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
149 ; CHECK-NEXT: vsrl.vi v10, v8, 8
150 ; CHECK-NEXT: vsll.vi v8, v8, 8
151 ; CHECK-NEXT: vor.vv v8, v8, v10
154 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i16_unmasked:
155 ; CHECK-ZVKB: # %bb.0:
156 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
157 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
158 ; CHECK-ZVKB-NEXT: ret
159 %v = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
160 ret <vscale x 8 x i16> %v
163 declare <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
165 define <vscale x 16 x i16> @vp_bswap_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
166 ; CHECK-LABEL: vp_bswap_nxv16i16:
168 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
169 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
170 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
171 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
174 ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i16:
175 ; CHECK-ZVKB: # %bb.0:
176 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
177 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
178 ; CHECK-ZVKB-NEXT: ret
179 %v = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
180 ret <vscale x 16 x i16> %v
183 define <vscale x 16 x i16> @vp_bswap_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
184 ; CHECK-LABEL: vp_bswap_nxv16i16_unmasked:
186 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
187 ; CHECK-NEXT: vsrl.vi v12, v8, 8
188 ; CHECK-NEXT: vsll.vi v8, v8, 8
189 ; CHECK-NEXT: vor.vv v8, v8, v12
192 ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i16_unmasked:
193 ; CHECK-ZVKB: # %bb.0:
194 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
195 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
196 ; CHECK-ZVKB-NEXT: ret
197 %v = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
198 ret <vscale x 16 x i16> %v
201 declare <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
203 define <vscale x 32 x i16> @vp_bswap_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
204 ; CHECK-LABEL: vp_bswap_nxv32i16:
206 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
207 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
208 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
209 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
212 ; CHECK-ZVKB-LABEL: vp_bswap_nxv32i16:
213 ; CHECK-ZVKB: # %bb.0:
214 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
215 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
216 ; CHECK-ZVKB-NEXT: ret
217 %v = call <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
218 ret <vscale x 32 x i16> %v
221 define <vscale x 32 x i16> @vp_bswap_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
222 ; CHECK-LABEL: vp_bswap_nxv32i16_unmasked:
224 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
225 ; CHECK-NEXT: vsrl.vi v16, v8, 8
226 ; CHECK-NEXT: vsll.vi v8, v8, 8
227 ; CHECK-NEXT: vor.vv v8, v8, v16
230 ; CHECK-ZVKB-LABEL: vp_bswap_nxv32i16_unmasked:
231 ; CHECK-ZVKB: # %bb.0:
232 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
233 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
234 ; CHECK-ZVKB-NEXT: ret
235 %v = call <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
236 ret <vscale x 32 x i16> %v
239 declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
241 define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
242 ; CHECK-LABEL: vp_bswap_nxv1i32:
244 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
245 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
246 ; CHECK-NEXT: lui a0, 16
247 ; CHECK-NEXT: addi a0, a0, -256
248 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
249 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
250 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
251 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
252 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
253 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
254 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
255 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
258 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32:
259 ; CHECK-ZVKB: # %bb.0:
260 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
261 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
262 ; CHECK-ZVKB-NEXT: ret
263 %v = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
264 ret <vscale x 1 x i32> %v
267 define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
268 ; CHECK-LABEL: vp_bswap_nxv1i32_unmasked:
270 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
271 ; CHECK-NEXT: vsrl.vi v9, v8, 8
272 ; CHECK-NEXT: lui a0, 16
273 ; CHECK-NEXT: vsrl.vi v10, v8, 24
274 ; CHECK-NEXT: addi a0, a0, -256
275 ; CHECK-NEXT: vand.vx v9, v9, a0
276 ; CHECK-NEXT: vor.vv v9, v9, v10
277 ; CHECK-NEXT: vand.vx v10, v8, a0
278 ; CHECK-NEXT: vsll.vi v10, v10, 8
279 ; CHECK-NEXT: vsll.vi v8, v8, 24
280 ; CHECK-NEXT: vor.vv v8, v8, v10
281 ; CHECK-NEXT: vor.vv v8, v8, v9
284 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32_unmasked:
285 ; CHECK-ZVKB: # %bb.0:
286 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
287 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
288 ; CHECK-ZVKB-NEXT: ret
289 %v = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
290 ret <vscale x 1 x i32> %v
293 declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
295 define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
296 ; CHECK-LABEL: vp_bswap_nxv2i32:
298 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
299 ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
300 ; CHECK-NEXT: lui a0, 16
301 ; CHECK-NEXT: addi a0, a0, -256
302 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
303 ; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
304 ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
305 ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
306 ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
307 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
308 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
309 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
312 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32:
313 ; CHECK-ZVKB: # %bb.0:
314 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
315 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
316 ; CHECK-ZVKB-NEXT: ret
317 %v = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
318 ret <vscale x 2 x i32> %v
321 define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
322 ; CHECK-LABEL: vp_bswap_nxv2i32_unmasked:
324 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
325 ; CHECK-NEXT: vsrl.vi v9, v8, 8
326 ; CHECK-NEXT: lui a0, 16
327 ; CHECK-NEXT: vsrl.vi v10, v8, 24
328 ; CHECK-NEXT: addi a0, a0, -256
329 ; CHECK-NEXT: vand.vx v9, v9, a0
330 ; CHECK-NEXT: vor.vv v9, v9, v10
331 ; CHECK-NEXT: vand.vx v10, v8, a0
332 ; CHECK-NEXT: vsll.vi v10, v10, 8
333 ; CHECK-NEXT: vsll.vi v8, v8, 24
334 ; CHECK-NEXT: vor.vv v8, v8, v10
335 ; CHECK-NEXT: vor.vv v8, v8, v9
338 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32_unmasked:
339 ; CHECK-ZVKB: # %bb.0:
340 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
341 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
342 ; CHECK-ZVKB-NEXT: ret
343 %v = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
344 ret <vscale x 2 x i32> %v
347 declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
349 define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
350 ; CHECK-LABEL: vp_bswap_nxv4i32:
352 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
353 ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
354 ; CHECK-NEXT: lui a0, 16
355 ; CHECK-NEXT: addi a0, a0, -256
356 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
357 ; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
358 ; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
359 ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
360 ; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
361 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
362 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
363 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
366 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32:
367 ; CHECK-ZVKB: # %bb.0:
368 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
369 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
370 ; CHECK-ZVKB-NEXT: ret
371 %v = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
372 ret <vscale x 4 x i32> %v
375 define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
376 ; CHECK-LABEL: vp_bswap_nxv4i32_unmasked:
378 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
379 ; CHECK-NEXT: vsrl.vi v10, v8, 8
380 ; CHECK-NEXT: lui a0, 16
381 ; CHECK-NEXT: vsrl.vi v12, v8, 24
382 ; CHECK-NEXT: addi a0, a0, -256
383 ; CHECK-NEXT: vand.vx v10, v10, a0
384 ; CHECK-NEXT: vor.vv v10, v10, v12
385 ; CHECK-NEXT: vand.vx v12, v8, a0
386 ; CHECK-NEXT: vsll.vi v12, v12, 8
387 ; CHECK-NEXT: vsll.vi v8, v8, 24
388 ; CHECK-NEXT: vor.vv v8, v8, v12
389 ; CHECK-NEXT: vor.vv v8, v8, v10
392 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32_unmasked:
393 ; CHECK-ZVKB: # %bb.0:
394 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
395 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
396 ; CHECK-ZVKB-NEXT: ret
397 %v = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
398 ret <vscale x 4 x i32> %v
401 declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
403 define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
404 ; CHECK-LABEL: vp_bswap_nxv8i32:
406 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
407 ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
408 ; CHECK-NEXT: lui a0, 16
409 ; CHECK-NEXT: addi a0, a0, -256
410 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
411 ; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
412 ; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
413 ; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
414 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
415 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
416 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
417 ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
420 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32:
421 ; CHECK-ZVKB: # %bb.0:
422 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
423 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
424 ; CHECK-ZVKB-NEXT: ret
425 %v = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
426 ret <vscale x 8 x i32> %v
429 define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
430 ; CHECK-LABEL: vp_bswap_nxv8i32_unmasked:
432 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
433 ; CHECK-NEXT: vsrl.vi v12, v8, 8
434 ; CHECK-NEXT: lui a0, 16
435 ; CHECK-NEXT: vsrl.vi v16, v8, 24
436 ; CHECK-NEXT: addi a0, a0, -256
437 ; CHECK-NEXT: vand.vx v12, v12, a0
438 ; CHECK-NEXT: vor.vv v12, v12, v16
439 ; CHECK-NEXT: vand.vx v16, v8, a0
440 ; CHECK-NEXT: vsll.vi v16, v16, 8
441 ; CHECK-NEXT: vsll.vi v8, v8, 24
442 ; CHECK-NEXT: vor.vv v8, v8, v16
443 ; CHECK-NEXT: vor.vv v8, v8, v12
446 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32_unmasked:
447 ; CHECK-ZVKB: # %bb.0:
448 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
449 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
450 ; CHECK-ZVKB-NEXT: ret
451 %v = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
452 ret <vscale x 8 x i32> %v
455 declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
457 define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
458 ; CHECK-LABEL: vp_bswap_nxv16i32:
460 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
461 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
462 ; CHECK-NEXT: lui a0, 16
463 ; CHECK-NEXT: addi a0, a0, -256
464 ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
465 ; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t
466 ; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
467 ; CHECK-NEXT: vand.vx v24, v8, a0, v0.t
468 ; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t
469 ; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
470 ; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
471 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
474 ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32:
475 ; CHECK-ZVKB: # %bb.0:
476 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
477 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
478 ; CHECK-ZVKB-NEXT: ret
479 %v = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
480 ret <vscale x 16 x i32> %v
483 define <vscale x 16 x i32> @vp_bswap_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
484 ; CHECK-LABEL: vp_bswap_nxv16i32_unmasked:
486 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
487 ; CHECK-NEXT: vsrl.vi v16, v8, 8
488 ; CHECK-NEXT: lui a0, 16
489 ; CHECK-NEXT: vsrl.vi v24, v8, 24
490 ; CHECK-NEXT: addi a0, a0, -256
491 ; CHECK-NEXT: vand.vx v16, v16, a0
492 ; CHECK-NEXT: vor.vv v16, v16, v24
493 ; CHECK-NEXT: vand.vx v24, v8, a0
494 ; CHECK-NEXT: vsll.vi v24, v24, 8
495 ; CHECK-NEXT: vsll.vi v8, v8, 24
496 ; CHECK-NEXT: vor.vv v8, v8, v24
497 ; CHECK-NEXT: vor.vv v8, v8, v16
500 ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32_unmasked:
501 ; CHECK-ZVKB: # %bb.0:
502 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
503 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
504 ; CHECK-ZVKB-NEXT: ret
505 %v = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
506 ret <vscale x 16 x i32> %v
509 declare <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
511 define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
512 ; RV32-LABEL: vp_bswap_nxv1i64:
514 ; RV32-NEXT: addi sp, sp, -16
515 ; RV32-NEXT: .cfi_def_cfa_offset 16
516 ; RV32-NEXT: lui a1, 1044480
517 ; RV32-NEXT: li a2, 56
518 ; RV32-NEXT: lui a3, 16
519 ; RV32-NEXT: li a4, 40
520 ; RV32-NEXT: lui a5, 4080
521 ; RV32-NEXT: addi a6, sp, 8
522 ; RV32-NEXT: sw a1, 8(sp)
523 ; RV32-NEXT: sw zero, 12(sp)
524 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
525 ; RV32-NEXT: vsll.vx v9, v8, a2, v0.t
526 ; RV32-NEXT: addi a0, a3, -256
527 ; RV32-NEXT: vand.vx v10, v8, a0, v0.t
528 ; RV32-NEXT: vlse64.v v11, (a6), zero
529 ; RV32-NEXT: vsll.vx v10, v10, a4, v0.t
530 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
531 ; RV32-NEXT: vand.vx v10, v8, a5, v0.t
532 ; RV32-NEXT: vsll.vi v10, v10, 24, v0.t
533 ; RV32-NEXT: vand.vv v12, v8, v11, v0.t
534 ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
535 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
536 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
537 ; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t
538 ; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t
539 ; RV32-NEXT: vand.vx v12, v12, a0, v0.t
540 ; RV32-NEXT: vor.vv v10, v12, v10, v0.t
541 ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
542 ; RV32-NEXT: vand.vx v12, v12, a5, v0.t
543 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
544 ; RV32-NEXT: vand.vv v8, v8, v11, v0.t
545 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
546 ; RV32-NEXT: vor.vv v8, v8, v10, v0.t
547 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
548 ; RV32-NEXT: addi sp, sp, 16
549 ; RV32-NEXT: .cfi_def_cfa_offset 0
552 ; RV64-LABEL: vp_bswap_nxv1i64:
554 ; RV64-NEXT: lui a1, 4080
555 ; RV64-NEXT: li a2, 255
556 ; RV64-NEXT: li a3, 56
557 ; RV64-NEXT: lui a4, 16
558 ; RV64-NEXT: li a5, 40
559 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
560 ; RV64-NEXT: vand.vx v9, v8, a1, v0.t
561 ; RV64-NEXT: slli a2, a2, 24
562 ; RV64-NEXT: addiw a0, a4, -256
563 ; RV64-NEXT: vsll.vi v9, v9, 24, v0.t
564 ; RV64-NEXT: vand.vx v10, v8, a2, v0.t
565 ; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
566 ; RV64-NEXT: vor.vv v9, v9, v10, v0.t
567 ; RV64-NEXT: vsll.vx v10, v8, a3, v0.t
568 ; RV64-NEXT: vand.vx v11, v8, a0, v0.t
569 ; RV64-NEXT: vsll.vx v11, v11, a5, v0.t
570 ; RV64-NEXT: vor.vv v10, v10, v11, v0.t
571 ; RV64-NEXT: vor.vv v9, v10, v9, v0.t
572 ; RV64-NEXT: vsrl.vx v10, v8, a3, v0.t
573 ; RV64-NEXT: vsrl.vx v11, v8, a5, v0.t
574 ; RV64-NEXT: vand.vx v11, v11, a0, v0.t
575 ; RV64-NEXT: vor.vv v10, v11, v10, v0.t
576 ; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t
577 ; RV64-NEXT: vand.vx v11, v11, a1, v0.t
578 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
579 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
580 ; RV64-NEXT: vor.vv v8, v8, v11, v0.t
581 ; RV64-NEXT: vor.vv v8, v8, v10, v0.t
582 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
585 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i64:
586 ; CHECK-ZVKB: # %bb.0:
587 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
588 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
589 ; CHECK-ZVKB-NEXT: ret
590 %v = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
591 ret <vscale x 1 x i64> %v
594 define <vscale x 1 x i64> @vp_bswap_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
595 ; RV32-LABEL: vp_bswap_nxv1i64_unmasked:
597 ; RV32-NEXT: addi sp, sp, -16
598 ; RV32-NEXT: .cfi_def_cfa_offset 16
599 ; RV32-NEXT: lui a1, 1044480
600 ; RV32-NEXT: li a2, 56
601 ; RV32-NEXT: lui a3, 16
602 ; RV32-NEXT: li a4, 40
603 ; RV32-NEXT: lui a5, 4080
604 ; RV32-NEXT: addi a6, sp, 8
605 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
606 ; RV32-NEXT: vsrl.vi v9, v8, 24
607 ; RV32-NEXT: sw a1, 8(sp)
608 ; RV32-NEXT: sw zero, 12(sp)
609 ; RV32-NEXT: vsll.vx v10, v8, a2
610 ; RV32-NEXT: addi a0, a3, -256
611 ; RV32-NEXT: vsrl.vx v11, v8, a2
612 ; RV32-NEXT: vsrl.vx v12, v8, a4
613 ; RV32-NEXT: vand.vx v13, v8, a0
614 ; RV32-NEXT: vand.vx v12, v12, a0
615 ; RV32-NEXT: vor.vv v11, v12, v11
616 ; RV32-NEXT: vlse64.v v12, (a6), zero
617 ; RV32-NEXT: vsll.vx v13, v13, a4
618 ; RV32-NEXT: vor.vv v10, v10, v13
619 ; RV32-NEXT: vsrl.vi v13, v8, 8
620 ; RV32-NEXT: vand.vx v9, v9, a5
621 ; RV32-NEXT: vand.vv v13, v13, v12
622 ; RV32-NEXT: vor.vv v9, v13, v9
623 ; RV32-NEXT: vand.vv v12, v8, v12
624 ; RV32-NEXT: vand.vx v8, v8, a5
625 ; RV32-NEXT: vsll.vi v8, v8, 24
626 ; RV32-NEXT: vsll.vi v12, v12, 8
627 ; RV32-NEXT: vor.vv v8, v8, v12
628 ; RV32-NEXT: vor.vv v8, v10, v8
629 ; RV32-NEXT: vor.vv v9, v9, v11
630 ; RV32-NEXT: vor.vv v8, v8, v9
631 ; RV32-NEXT: addi sp, sp, 16
632 ; RV32-NEXT: .cfi_def_cfa_offset 0
635 ; RV64-LABEL: vp_bswap_nxv1i64_unmasked:
637 ; RV64-NEXT: lui a1, 4080
638 ; RV64-NEXT: li a2, 255
639 ; RV64-NEXT: li a3, 56
640 ; RV64-NEXT: lui a4, 16
641 ; RV64-NEXT: li a5, 40
642 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
643 ; RV64-NEXT: vsrl.vi v9, v8, 24
644 ; RV64-NEXT: vsrl.vi v10, v8, 8
645 ; RV64-NEXT: addiw a0, a4, -256
646 ; RV64-NEXT: vsrl.vx v11, v8, a3
647 ; RV64-NEXT: vsrl.vx v12, v8, a5
648 ; RV64-NEXT: vand.vx v12, v12, a0
649 ; RV64-NEXT: vor.vv v11, v12, v11
650 ; RV64-NEXT: vand.vx v12, v8, a1
651 ; RV64-NEXT: slli a2, a2, 24
652 ; RV64-NEXT: vand.vx v9, v9, a1
653 ; RV64-NEXT: vsll.vi v12, v12, 24
654 ; RV64-NEXT: vand.vx v10, v10, a2
655 ; RV64-NEXT: vor.vv v9, v10, v9
656 ; RV64-NEXT: vand.vx v10, v8, a2
657 ; RV64-NEXT: vsll.vi v10, v10, 8
658 ; RV64-NEXT: vor.vv v10, v12, v10
659 ; RV64-NEXT: vsll.vx v12, v8, a3
660 ; RV64-NEXT: vand.vx v8, v8, a0
661 ; RV64-NEXT: vsll.vx v8, v8, a5
662 ; RV64-NEXT: vor.vv v8, v12, v8
663 ; RV64-NEXT: vor.vv v8, v8, v10
664 ; RV64-NEXT: vor.vv v9, v9, v11
665 ; RV64-NEXT: vor.vv v8, v8, v9
668 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i64_unmasked:
669 ; CHECK-ZVKB: # %bb.0:
670 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
671 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
672 ; CHECK-ZVKB-NEXT: ret
673 %v = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
674 ret <vscale x 1 x i64> %v
677 declare <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
679 define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
680 ; RV32-LABEL: vp_bswap_nxv2i64:
682 ; RV32-NEXT: addi sp, sp, -16
683 ; RV32-NEXT: .cfi_def_cfa_offset 16
684 ; RV32-NEXT: lui a1, 1044480
685 ; RV32-NEXT: li a2, 56
686 ; RV32-NEXT: lui a3, 16
687 ; RV32-NEXT: li a4, 40
688 ; RV32-NEXT: lui a5, 4080
689 ; RV32-NEXT: addi a6, sp, 8
690 ; RV32-NEXT: sw a1, 8(sp)
691 ; RV32-NEXT: sw zero, 12(sp)
692 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
693 ; RV32-NEXT: vsll.vx v10, v8, a2, v0.t
694 ; RV32-NEXT: addi a0, a3, -256
695 ; RV32-NEXT: vand.vx v12, v8, a0, v0.t
696 ; RV32-NEXT: vlse64.v v14, (a6), zero
697 ; RV32-NEXT: vsll.vx v12, v12, a4, v0.t
698 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
699 ; RV32-NEXT: vand.vx v12, v8, a5, v0.t
700 ; RV32-NEXT: vsll.vi v12, v12, 24, v0.t
701 ; RV32-NEXT: vand.vv v16, v8, v14, v0.t
702 ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
703 ; RV32-NEXT: vor.vv v12, v12, v16, v0.t
704 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
705 ; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t
706 ; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t
707 ; RV32-NEXT: vand.vx v16, v16, a0, v0.t
708 ; RV32-NEXT: vor.vv v12, v16, v12, v0.t
709 ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
710 ; RV32-NEXT: vand.vx v16, v16, a5, v0.t
711 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
712 ; RV32-NEXT: vand.vv v8, v8, v14, v0.t
713 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
714 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
715 ; RV32-NEXT: vor.vv v8, v10, v8, v0.t
716 ; RV32-NEXT: addi sp, sp, 16
717 ; RV32-NEXT: .cfi_def_cfa_offset 0
720 ; RV64-LABEL: vp_bswap_nxv2i64:
722 ; RV64-NEXT: lui a1, 4080
723 ; RV64-NEXT: li a2, 255
724 ; RV64-NEXT: li a3, 56
725 ; RV64-NEXT: lui a4, 16
726 ; RV64-NEXT: li a5, 40
727 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
728 ; RV64-NEXT: vand.vx v10, v8, a1, v0.t
729 ; RV64-NEXT: slli a2, a2, 24
730 ; RV64-NEXT: addiw a0, a4, -256
731 ; RV64-NEXT: vsll.vi v10, v10, 24, v0.t
732 ; RV64-NEXT: vand.vx v12, v8, a2, v0.t
733 ; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
734 ; RV64-NEXT: vor.vv v10, v10, v12, v0.t
735 ; RV64-NEXT: vsll.vx v12, v8, a3, v0.t
736 ; RV64-NEXT: vand.vx v14, v8, a0, v0.t
737 ; RV64-NEXT: vsll.vx v14, v14, a5, v0.t
738 ; RV64-NEXT: vor.vv v12, v12, v14, v0.t
739 ; RV64-NEXT: vor.vv v10, v12, v10, v0.t
740 ; RV64-NEXT: vsrl.vx v12, v8, a3, v0.t
741 ; RV64-NEXT: vsrl.vx v14, v8, a5, v0.t
742 ; RV64-NEXT: vand.vx v14, v14, a0, v0.t
743 ; RV64-NEXT: vor.vv v12, v14, v12, v0.t
744 ; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t
745 ; RV64-NEXT: vand.vx v14, v14, a1, v0.t
746 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
747 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
748 ; RV64-NEXT: vor.vv v8, v8, v14, v0.t
749 ; RV64-NEXT: vor.vv v8, v8, v12, v0.t
750 ; RV64-NEXT: vor.vv v8, v10, v8, v0.t
753 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i64:
754 ; CHECK-ZVKB: # %bb.0:
755 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
756 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
757 ; CHECK-ZVKB-NEXT: ret
758 %v = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
759 ret <vscale x 2 x i64> %v
762 define <vscale x 2 x i64> @vp_bswap_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
763 ; RV32-LABEL: vp_bswap_nxv2i64_unmasked:
765 ; RV32-NEXT: addi sp, sp, -16
766 ; RV32-NEXT: .cfi_def_cfa_offset 16
767 ; RV32-NEXT: lui a1, 1044480
768 ; RV32-NEXT: li a2, 56
769 ; RV32-NEXT: lui a3, 16
770 ; RV32-NEXT: li a4, 40
771 ; RV32-NEXT: lui a5, 4080
772 ; RV32-NEXT: addi a6, sp, 8
773 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
774 ; RV32-NEXT: vsrl.vi v10, v8, 24
775 ; RV32-NEXT: sw a1, 8(sp)
776 ; RV32-NEXT: sw zero, 12(sp)
777 ; RV32-NEXT: vsll.vx v12, v8, a2
778 ; RV32-NEXT: addi a0, a3, -256
779 ; RV32-NEXT: vsrl.vx v14, v8, a2
780 ; RV32-NEXT: vsrl.vx v16, v8, a4
781 ; RV32-NEXT: vand.vx v18, v8, a0
782 ; RV32-NEXT: vand.vx v16, v16, a0
783 ; RV32-NEXT: vor.vv v14, v16, v14
784 ; RV32-NEXT: vlse64.v v16, (a6), zero
785 ; RV32-NEXT: vsll.vx v18, v18, a4
786 ; RV32-NEXT: vor.vv v12, v12, v18
787 ; RV32-NEXT: vsrl.vi v18, v8, 8
788 ; RV32-NEXT: vand.vx v10, v10, a5
789 ; RV32-NEXT: vand.vv v18, v18, v16
790 ; RV32-NEXT: vor.vv v10, v18, v10
791 ; RV32-NEXT: vand.vv v16, v8, v16
792 ; RV32-NEXT: vand.vx v8, v8, a5
793 ; RV32-NEXT: vsll.vi v8, v8, 24
794 ; RV32-NEXT: vsll.vi v16, v16, 8
795 ; RV32-NEXT: vor.vv v8, v8, v16
796 ; RV32-NEXT: vor.vv v8, v12, v8
797 ; RV32-NEXT: vor.vv v10, v10, v14
798 ; RV32-NEXT: vor.vv v8, v8, v10
799 ; RV32-NEXT: addi sp, sp, 16
800 ; RV32-NEXT: .cfi_def_cfa_offset 0
803 ; RV64-LABEL: vp_bswap_nxv2i64_unmasked:
805 ; RV64-NEXT: lui a1, 4080
806 ; RV64-NEXT: li a2, 255
807 ; RV64-NEXT: li a3, 56
808 ; RV64-NEXT: lui a4, 16
809 ; RV64-NEXT: li a5, 40
810 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
811 ; RV64-NEXT: vsrl.vi v10, v8, 24
812 ; RV64-NEXT: vsrl.vi v12, v8, 8
813 ; RV64-NEXT: addiw a0, a4, -256
814 ; RV64-NEXT: vsrl.vx v14, v8, a3
815 ; RV64-NEXT: vsrl.vx v16, v8, a5
816 ; RV64-NEXT: vand.vx v16, v16, a0
817 ; RV64-NEXT: vor.vv v14, v16, v14
818 ; RV64-NEXT: vand.vx v16, v8, a1
819 ; RV64-NEXT: slli a2, a2, 24
820 ; RV64-NEXT: vand.vx v10, v10, a1
821 ; RV64-NEXT: vsll.vi v16, v16, 24
822 ; RV64-NEXT: vand.vx v12, v12, a2
823 ; RV64-NEXT: vor.vv v10, v12, v10
824 ; RV64-NEXT: vand.vx v12, v8, a2
825 ; RV64-NEXT: vsll.vi v12, v12, 8
826 ; RV64-NEXT: vor.vv v12, v16, v12
827 ; RV64-NEXT: vsll.vx v16, v8, a3
828 ; RV64-NEXT: vand.vx v8, v8, a0
829 ; RV64-NEXT: vsll.vx v8, v8, a5
830 ; RV64-NEXT: vor.vv v8, v16, v8
831 ; RV64-NEXT: vor.vv v8, v8, v12
832 ; RV64-NEXT: vor.vv v10, v10, v14
833 ; RV64-NEXT: vor.vv v8, v8, v10
836 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i64_unmasked:
837 ; CHECK-ZVKB: # %bb.0:
838 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
839 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
840 ; CHECK-ZVKB-NEXT: ret
841 %v = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
842 ret <vscale x 2 x i64> %v
845 declare <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
847 define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
848 ; RV32-LABEL: vp_bswap_nxv4i64:
850 ; RV32-NEXT: addi sp, sp, -16
851 ; RV32-NEXT: .cfi_def_cfa_offset 16
852 ; RV32-NEXT: lui a1, 1044480
853 ; RV32-NEXT: li a2, 56
854 ; RV32-NEXT: lui a3, 16
855 ; RV32-NEXT: li a4, 40
856 ; RV32-NEXT: lui a5, 4080
857 ; RV32-NEXT: addi a6, sp, 8
858 ; RV32-NEXT: sw a1, 8(sp)
859 ; RV32-NEXT: sw zero, 12(sp)
860 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
861 ; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
862 ; RV32-NEXT: addi a0, a3, -256
863 ; RV32-NEXT: vand.vx v20, v8, a0, v0.t
864 ; RV32-NEXT: vlse64.v v12, (a6), zero
865 ; RV32-NEXT: vsll.vx v20, v20, a4, v0.t
866 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
867 ; RV32-NEXT: vand.vx v20, v8, a5, v0.t
868 ; RV32-NEXT: vsll.vi v20, v20, 24, v0.t
869 ; RV32-NEXT: vand.vv v24, v8, v12, v0.t
870 ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
871 ; RV32-NEXT: vor.vv v20, v20, v24, v0.t
872 ; RV32-NEXT: vor.vv v16, v16, v20, v0.t
873 ; RV32-NEXT: vsrl.vx v20, v8, a2, v0.t
874 ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
875 ; RV32-NEXT: vand.vx v24, v24, a0, v0.t
876 ; RV32-NEXT: vor.vv v20, v24, v20, v0.t
877 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
878 ; RV32-NEXT: vand.vx v24, v24, a5, v0.t
879 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
880 ; RV32-NEXT: vand.vv v8, v8, v12, v0.t
881 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
882 ; RV32-NEXT: vor.vv v8, v8, v20, v0.t
883 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
884 ; RV32-NEXT: addi sp, sp, 16
885 ; RV32-NEXT: .cfi_def_cfa_offset 0
888 ; RV64-LABEL: vp_bswap_nxv4i64:
890 ; RV64-NEXT: lui a1, 4080
891 ; RV64-NEXT: li a2, 255
892 ; RV64-NEXT: li a3, 56
893 ; RV64-NEXT: lui a4, 16
894 ; RV64-NEXT: li a5, 40
895 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
896 ; RV64-NEXT: vand.vx v12, v8, a1, v0.t
897 ; RV64-NEXT: slli a2, a2, 24
898 ; RV64-NEXT: addiw a0, a4, -256
899 ; RV64-NEXT: vsll.vi v12, v12, 24, v0.t
900 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t
901 ; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
902 ; RV64-NEXT: vor.vv v12, v12, v16, v0.t
903 ; RV64-NEXT: vsll.vx v16, v8, a3, v0.t
904 ; RV64-NEXT: vand.vx v20, v8, a0, v0.t
905 ; RV64-NEXT: vsll.vx v20, v20, a5, v0.t
906 ; RV64-NEXT: vor.vv v16, v16, v20, v0.t
907 ; RV64-NEXT: vor.vv v12, v16, v12, v0.t
908 ; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t
909 ; RV64-NEXT: vsrl.vx v20, v8, a5, v0.t
910 ; RV64-NEXT: vand.vx v20, v20, a0, v0.t
911 ; RV64-NEXT: vor.vv v16, v20, v16, v0.t
912 ; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t
913 ; RV64-NEXT: vand.vx v20, v20, a1, v0.t
914 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
915 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
916 ; RV64-NEXT: vor.vv v8, v8, v20, v0.t
917 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
918 ; RV64-NEXT: vor.vv v8, v12, v8, v0.t
921 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i64:
922 ; CHECK-ZVKB: # %bb.0:
923 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
924 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
925 ; CHECK-ZVKB-NEXT: ret
926 %v = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
927 ret <vscale x 4 x i64> %v
930 define <vscale x 4 x i64> @vp_bswap_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
931 ; RV32-LABEL: vp_bswap_nxv4i64_unmasked:
933 ; RV32-NEXT: addi sp, sp, -16
934 ; RV32-NEXT: .cfi_def_cfa_offset 16
935 ; RV32-NEXT: lui a1, 1044480
936 ; RV32-NEXT: li a2, 56
937 ; RV32-NEXT: lui a3, 16
938 ; RV32-NEXT: li a4, 40
939 ; RV32-NEXT: lui a5, 4080
940 ; RV32-NEXT: addi a6, sp, 8
941 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
942 ; RV32-NEXT: vsrl.vi v12, v8, 24
943 ; RV32-NEXT: sw a1, 8(sp)
944 ; RV32-NEXT: sw zero, 12(sp)
945 ; RV32-NEXT: vsll.vx v16, v8, a2
946 ; RV32-NEXT: addi a0, a3, -256
947 ; RV32-NEXT: vsrl.vx v20, v8, a2
948 ; RV32-NEXT: vsrl.vx v24, v8, a4
949 ; RV32-NEXT: vand.vx v28, v8, a0
950 ; RV32-NEXT: vand.vx v24, v24, a0
951 ; RV32-NEXT: vor.vv v20, v24, v20
952 ; RV32-NEXT: vlse64.v v24, (a6), zero
953 ; RV32-NEXT: vsll.vx v28, v28, a4
954 ; RV32-NEXT: vor.vv v16, v16, v28
955 ; RV32-NEXT: vsrl.vi v28, v8, 8
956 ; RV32-NEXT: vand.vx v12, v12, a5
957 ; RV32-NEXT: vand.vv v28, v28, v24
958 ; RV32-NEXT: vor.vv v12, v28, v12
959 ; RV32-NEXT: vand.vv v24, v8, v24
960 ; RV32-NEXT: vand.vx v8, v8, a5
961 ; RV32-NEXT: vsll.vi v8, v8, 24
962 ; RV32-NEXT: vsll.vi v24, v24, 8
963 ; RV32-NEXT: vor.vv v8, v8, v24
964 ; RV32-NEXT: vor.vv v8, v16, v8
965 ; RV32-NEXT: vor.vv v12, v12, v20
966 ; RV32-NEXT: vor.vv v8, v8, v12
967 ; RV32-NEXT: addi sp, sp, 16
968 ; RV32-NEXT: .cfi_def_cfa_offset 0
971 ; RV64-LABEL: vp_bswap_nxv4i64_unmasked:
973 ; RV64-NEXT: lui a1, 4080
974 ; RV64-NEXT: li a2, 255
975 ; RV64-NEXT: li a3, 56
976 ; RV64-NEXT: lui a4, 16
977 ; RV64-NEXT: li a5, 40
978 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
979 ; RV64-NEXT: vsrl.vi v12, v8, 24
980 ; RV64-NEXT: vsrl.vi v16, v8, 8
981 ; RV64-NEXT: addiw a0, a4, -256
982 ; RV64-NEXT: vsrl.vx v20, v8, a3
983 ; RV64-NEXT: vsrl.vx v24, v8, a5
984 ; RV64-NEXT: vand.vx v24, v24, a0
985 ; RV64-NEXT: vor.vv v20, v24, v20
986 ; RV64-NEXT: vand.vx v24, v8, a1
987 ; RV64-NEXT: slli a2, a2, 24
988 ; RV64-NEXT: vand.vx v12, v12, a1
989 ; RV64-NEXT: vsll.vi v24, v24, 24
990 ; RV64-NEXT: vand.vx v16, v16, a2
991 ; RV64-NEXT: vor.vv v12, v16, v12
992 ; RV64-NEXT: vand.vx v16, v8, a2
993 ; RV64-NEXT: vsll.vi v16, v16, 8
994 ; RV64-NEXT: vor.vv v16, v24, v16
995 ; RV64-NEXT: vsll.vx v24, v8, a3
996 ; RV64-NEXT: vand.vx v8, v8, a0
997 ; RV64-NEXT: vsll.vx v8, v8, a5
998 ; RV64-NEXT: vor.vv v8, v24, v8
999 ; RV64-NEXT: vor.vv v8, v8, v16
1000 ; RV64-NEXT: vor.vv v12, v12, v20
1001 ; RV64-NEXT: vor.vv v8, v8, v12
1004 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i64_unmasked:
1005 ; CHECK-ZVKB: # %bb.0:
1006 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1007 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
1008 ; CHECK-ZVKB-NEXT: ret
1009 %v = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1010 ret <vscale x 4 x i64> %v
1013 declare <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32)
1015 define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1016 ; RV32-LABEL: vp_bswap_nxv7i64:
1018 ; RV32-NEXT: addi sp, sp, -16
1019 ; RV32-NEXT: .cfi_def_cfa_offset 16
1020 ; RV32-NEXT: csrr a1, vlenb
1021 ; RV32-NEXT: li a2, 24
1022 ; RV32-NEXT: mul a1, a1, a2
1023 ; RV32-NEXT: sub sp, sp, a1
1024 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1025 ; RV32-NEXT: lui a1, 1044480
1026 ; RV32-NEXT: li a2, 56
1027 ; RV32-NEXT: lui a3, 16
1028 ; RV32-NEXT: li a4, 40
1029 ; RV32-NEXT: addi a5, sp, 8
1030 ; RV32-NEXT: sw a1, 8(sp)
1031 ; RV32-NEXT: sw zero, 12(sp)
1032 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1033 ; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
1034 ; RV32-NEXT: addi a0, a3, -256
1035 ; RV32-NEXT: vand.vx v24, v8, a0, v0.t
1036 ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
1037 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
1038 ; RV32-NEXT: csrr a1, vlenb
1039 ; RV32-NEXT: slli a1, a1, 4
1040 ; RV32-NEXT: add a1, sp, a1
1041 ; RV32-NEXT: addi a1, a1, 16
1042 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1043 ; RV32-NEXT: vlse64.v v16, (a5), zero
1044 ; RV32-NEXT: csrr a1, vlenb
1045 ; RV32-NEXT: slli a1, a1, 3
1046 ; RV32-NEXT: add a1, sp, a1
1047 ; RV32-NEXT: addi a1, a1, 16
1048 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1049 ; RV32-NEXT: lui a1, 4080
1050 ; RV32-NEXT: vand.vx v24, v8, a1, v0.t
1051 ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
1052 ; RV32-NEXT: addi a3, sp, 16
1053 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
1054 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1055 ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
1056 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1057 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1058 ; RV32-NEXT: csrr a3, vlenb
1059 ; RV32-NEXT: slli a3, a3, 4
1060 ; RV32-NEXT: add a3, sp, a3
1061 ; RV32-NEXT: addi a3, a3, 16
1062 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1063 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1064 ; RV32-NEXT: csrr a3, vlenb
1065 ; RV32-NEXT: slli a3, a3, 4
1066 ; RV32-NEXT: add a3, sp, a3
1067 ; RV32-NEXT: addi a3, a3, 16
1068 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1069 ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
1070 ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
1071 ; RV32-NEXT: vand.vx v24, v24, a0, v0.t
1072 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1073 ; RV32-NEXT: addi a0, sp, 16
1074 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1075 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
1076 ; RV32-NEXT: vand.vx v24, v24, a1, v0.t
1077 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1078 ; RV32-NEXT: csrr a0, vlenb
1079 ; RV32-NEXT: slli a0, a0, 3
1080 ; RV32-NEXT: add a0, sp, a0
1081 ; RV32-NEXT: addi a0, a0, 16
1082 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1083 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1084 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
1085 ; RV32-NEXT: addi a0, sp, 16
1086 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1087 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1088 ; RV32-NEXT: csrr a0, vlenb
1089 ; RV32-NEXT: slli a0, a0, 4
1090 ; RV32-NEXT: add a0, sp, a0
1091 ; RV32-NEXT: addi a0, a0, 16
1092 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1093 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1094 ; RV32-NEXT: csrr a0, vlenb
1095 ; RV32-NEXT: li a1, 24
1096 ; RV32-NEXT: mul a0, a0, a1
1097 ; RV32-NEXT: add sp, sp, a0
1098 ; RV32-NEXT: .cfi_def_cfa sp, 16
1099 ; RV32-NEXT: addi sp, sp, 16
1100 ; RV32-NEXT: .cfi_def_cfa_offset 0
1103 ; RV64-LABEL: vp_bswap_nxv7i64:
1105 ; RV64-NEXT: addi sp, sp, -16
1106 ; RV64-NEXT: .cfi_def_cfa_offset 16
1107 ; RV64-NEXT: csrr a1, vlenb
1108 ; RV64-NEXT: slli a1, a1, 3
1109 ; RV64-NEXT: sub sp, sp, a1
1110 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1111 ; RV64-NEXT: lui a1, 4080
1112 ; RV64-NEXT: li a2, 255
1113 ; RV64-NEXT: li a3, 56
1114 ; RV64-NEXT: lui a4, 16
1115 ; RV64-NEXT: li a5, 40
1116 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1117 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
1118 ; RV64-NEXT: slli a2, a2, 24
1119 ; RV64-NEXT: addiw a0, a4, -256
1120 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
1121 ; RV64-NEXT: vand.vx v24, v8, a2, v0.t
1122 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
1123 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
1124 ; RV64-NEXT: addi a4, sp, 16
1125 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1126 ; RV64-NEXT: vsll.vx v24, v8, a3, v0.t
1127 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
1128 ; RV64-NEXT: vsll.vx v16, v16, a5, v0.t
1129 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
1130 ; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
1131 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
1132 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1133 ; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t
1134 ; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
1135 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
1136 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
1137 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
1138 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1139 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1140 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1141 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
1142 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
1143 ; RV64-NEXT: addi a0, sp, 16
1144 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1145 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
1146 ; RV64-NEXT: csrr a0, vlenb
1147 ; RV64-NEXT: slli a0, a0, 3
1148 ; RV64-NEXT: add sp, sp, a0
1149 ; RV64-NEXT: .cfi_def_cfa sp, 16
1150 ; RV64-NEXT: addi sp, sp, 16
1151 ; RV64-NEXT: .cfi_def_cfa_offset 0
1154 ; CHECK-ZVKB-LABEL: vp_bswap_nxv7i64:
1155 ; CHECK-ZVKB: # %bb.0:
1156 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1157 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
1158 ; CHECK-ZVKB-NEXT: ret
1159 %v = call <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
1160 ret <vscale x 7 x i64> %v
1163 define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
1164 ; RV32-LABEL: vp_bswap_nxv7i64_unmasked:
1166 ; RV32-NEXT: addi sp, sp, -16
1167 ; RV32-NEXT: .cfi_def_cfa_offset 16
1168 ; RV32-NEXT: csrr a1, vlenb
1169 ; RV32-NEXT: slli a1, a1, 4
1170 ; RV32-NEXT: sub sp, sp, a1
1171 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1172 ; RV32-NEXT: lui a1, 1044480
1173 ; RV32-NEXT: li a2, 56
1174 ; RV32-NEXT: lui a3, 16
1175 ; RV32-NEXT: li a4, 40
1176 ; RV32-NEXT: lui a5, 4080
1177 ; RV32-NEXT: addi a6, sp, 8
1178 ; RV32-NEXT: sw a1, 8(sp)
1179 ; RV32-NEXT: sw zero, 12(sp)
1180 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1181 ; RV32-NEXT: vsll.vx v24, v8, a2
1182 ; RV32-NEXT: addi a0, a3, -256
1183 ; RV32-NEXT: vsrl.vx v16, v8, a2
1184 ; RV32-NEXT: vsrl.vx v0, v8, a4
1185 ; RV32-NEXT: vand.vx v0, v0, a0
1186 ; RV32-NEXT: vor.vv v16, v0, v16
1187 ; RV32-NEXT: csrr a1, vlenb
1188 ; RV32-NEXT: slli a1, a1, 3
1189 ; RV32-NEXT: add a1, sp, a1
1190 ; RV32-NEXT: addi a1, a1, 16
1191 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1192 ; RV32-NEXT: vand.vx v0, v8, a0
1193 ; RV32-NEXT: vsll.vx v0, v0, a4
1194 ; RV32-NEXT: vor.vv v16, v24, v0
1195 ; RV32-NEXT: addi a0, sp, 16
1196 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1197 ; RV32-NEXT: vlse64.v v0, (a6), zero
1198 ; RV32-NEXT: vsrl.vi v16, v8, 24
1199 ; RV32-NEXT: vand.vx v16, v16, a5
1200 ; RV32-NEXT: vsrl.vi v24, v8, 8
1201 ; RV32-NEXT: vand.vv v24, v24, v0
1202 ; RV32-NEXT: vor.vv v16, v24, v16
1203 ; RV32-NEXT: vand.vv v24, v8, v0
1204 ; RV32-NEXT: vand.vx v8, v8, a5
1205 ; RV32-NEXT: vsll.vi v8, v8, 24
1206 ; RV32-NEXT: vsll.vi v24, v24, 8
1207 ; RV32-NEXT: vor.vv v8, v8, v24
1208 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1209 ; RV32-NEXT: vor.vv v8, v24, v8
1210 ; RV32-NEXT: csrr a0, vlenb
1211 ; RV32-NEXT: slli a0, a0, 3
1212 ; RV32-NEXT: add a0, sp, a0
1213 ; RV32-NEXT: addi a0, a0, 16
1214 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1215 ; RV32-NEXT: vor.vv v16, v16, v24
1216 ; RV32-NEXT: vor.vv v8, v8, v16
1217 ; RV32-NEXT: csrr a0, vlenb
1218 ; RV32-NEXT: slli a0, a0, 4
1219 ; RV32-NEXT: add sp, sp, a0
1220 ; RV32-NEXT: .cfi_def_cfa sp, 16
1221 ; RV32-NEXT: addi sp, sp, 16
1222 ; RV32-NEXT: .cfi_def_cfa_offset 0
1225 ; RV64-LABEL: vp_bswap_nxv7i64_unmasked:
1227 ; RV64-NEXT: addi sp, sp, -16
1228 ; RV64-NEXT: .cfi_def_cfa_offset 16
1229 ; RV64-NEXT: csrr a1, vlenb
1230 ; RV64-NEXT: slli a1, a1, 3
1231 ; RV64-NEXT: sub sp, sp, a1
1232 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1233 ; RV64-NEXT: lui a1, 4080
1234 ; RV64-NEXT: li a2, 255
1235 ; RV64-NEXT: li a3, 56
1236 ; RV64-NEXT: lui a4, 16
1237 ; RV64-NEXT: li a5, 40
1238 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1239 ; RV64-NEXT: vsrl.vi v24, v8, 24
1240 ; RV64-NEXT: addiw a0, a4, -256
1241 ; RV64-NEXT: vsrl.vx v16, v8, a3
1242 ; RV64-NEXT: vsrl.vx v0, v8, a5
1243 ; RV64-NEXT: vand.vx v0, v0, a0
1244 ; RV64-NEXT: vor.vv v16, v0, v16
1245 ; RV64-NEXT: addi a4, sp, 16
1246 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1247 ; RV64-NEXT: vsrl.vi v0, v8, 8
1248 ; RV64-NEXT: slli a2, a2, 24
1249 ; RV64-NEXT: vand.vx v24, v24, a1
1250 ; RV64-NEXT: vand.vx v0, v0, a2
1251 ; RV64-NEXT: vor.vv v24, v0, v24
1252 ; RV64-NEXT: vand.vx v0, v8, a1
1253 ; RV64-NEXT: vsll.vi v0, v0, 24
1254 ; RV64-NEXT: vand.vx v16, v8, a2
1255 ; RV64-NEXT: vsll.vi v16, v16, 8
1256 ; RV64-NEXT: vor.vv v16, v0, v16
1257 ; RV64-NEXT: vsll.vx v0, v8, a3
1258 ; RV64-NEXT: vand.vx v8, v8, a0
1259 ; RV64-NEXT: vsll.vx v8, v8, a5
1260 ; RV64-NEXT: vor.vv v8, v0, v8
1261 ; RV64-NEXT: vor.vv v8, v8, v16
1262 ; RV64-NEXT: addi a0, sp, 16
1263 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1264 ; RV64-NEXT: vor.vv v16, v24, v16
1265 ; RV64-NEXT: vor.vv v8, v8, v16
1266 ; RV64-NEXT: csrr a0, vlenb
1267 ; RV64-NEXT: slli a0, a0, 3
1268 ; RV64-NEXT: add sp, sp, a0
1269 ; RV64-NEXT: .cfi_def_cfa sp, 16
1270 ; RV64-NEXT: addi sp, sp, 16
1271 ; RV64-NEXT: .cfi_def_cfa_offset 0
1274 ; CHECK-ZVKB-LABEL: vp_bswap_nxv7i64_unmasked:
1275 ; CHECK-ZVKB: # %bb.0:
1276 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1277 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
1278 ; CHECK-ZVKB-NEXT: ret
1279 %v = call <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
1280 ret <vscale x 7 x i64> %v
1283 declare <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1285 define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1286 ; RV32-LABEL: vp_bswap_nxv8i64:
1288 ; RV32-NEXT: addi sp, sp, -16
1289 ; RV32-NEXT: .cfi_def_cfa_offset 16
1290 ; RV32-NEXT: csrr a1, vlenb
1291 ; RV32-NEXT: li a2, 24
1292 ; RV32-NEXT: mul a1, a1, a2
1293 ; RV32-NEXT: sub sp, sp, a1
1294 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1295 ; RV32-NEXT: lui a1, 1044480
1296 ; RV32-NEXT: li a2, 56
1297 ; RV32-NEXT: lui a3, 16
1298 ; RV32-NEXT: li a4, 40
1299 ; RV32-NEXT: addi a5, sp, 8
1300 ; RV32-NEXT: sw a1, 8(sp)
1301 ; RV32-NEXT: sw zero, 12(sp)
1302 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1303 ; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
1304 ; RV32-NEXT: addi a0, a3, -256
1305 ; RV32-NEXT: vand.vx v24, v8, a0, v0.t
1306 ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
1307 ; RV32-NEXT: vor.vv v16, v16, v24, v0.t
1308 ; RV32-NEXT: csrr a1, vlenb
1309 ; RV32-NEXT: slli a1, a1, 4
1310 ; RV32-NEXT: add a1, sp, a1
1311 ; RV32-NEXT: addi a1, a1, 16
1312 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1313 ; RV32-NEXT: vlse64.v v16, (a5), zero
1314 ; RV32-NEXT: csrr a1, vlenb
1315 ; RV32-NEXT: slli a1, a1, 3
1316 ; RV32-NEXT: add a1, sp, a1
1317 ; RV32-NEXT: addi a1, a1, 16
1318 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1319 ; RV32-NEXT: lui a1, 4080
1320 ; RV32-NEXT: vand.vx v24, v8, a1, v0.t
1321 ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
1322 ; RV32-NEXT: addi a3, sp, 16
1323 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
1324 ; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1325 ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
1326 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1327 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1328 ; RV32-NEXT: csrr a3, vlenb
1329 ; RV32-NEXT: slli a3, a3, 4
1330 ; RV32-NEXT: add a3, sp, a3
1331 ; RV32-NEXT: addi a3, a3, 16
1332 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1333 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1334 ; RV32-NEXT: csrr a3, vlenb
1335 ; RV32-NEXT: slli a3, a3, 4
1336 ; RV32-NEXT: add a3, sp, a3
1337 ; RV32-NEXT: addi a3, a3, 16
1338 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1339 ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
1340 ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
1341 ; RV32-NEXT: vand.vx v24, v24, a0, v0.t
1342 ; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1343 ; RV32-NEXT: addi a0, sp, 16
1344 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1345 ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
1346 ; RV32-NEXT: vand.vx v24, v24, a1, v0.t
1347 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1348 ; RV32-NEXT: csrr a0, vlenb
1349 ; RV32-NEXT: slli a0, a0, 3
1350 ; RV32-NEXT: add a0, sp, a0
1351 ; RV32-NEXT: addi a0, a0, 16
1352 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1353 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1354 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t
1355 ; RV32-NEXT: addi a0, sp, 16
1356 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1357 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1358 ; RV32-NEXT: csrr a0, vlenb
1359 ; RV32-NEXT: slli a0, a0, 4
1360 ; RV32-NEXT: add a0, sp, a0
1361 ; RV32-NEXT: addi a0, a0, 16
1362 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1363 ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1364 ; RV32-NEXT: csrr a0, vlenb
1365 ; RV32-NEXT: li a1, 24
1366 ; RV32-NEXT: mul a0, a0, a1
1367 ; RV32-NEXT: add sp, sp, a0
1368 ; RV32-NEXT: .cfi_def_cfa sp, 16
1369 ; RV32-NEXT: addi sp, sp, 16
1370 ; RV32-NEXT: .cfi_def_cfa_offset 0
1373 ; RV64-LABEL: vp_bswap_nxv8i64:
1375 ; RV64-NEXT: addi sp, sp, -16
1376 ; RV64-NEXT: .cfi_def_cfa_offset 16
1377 ; RV64-NEXT: csrr a1, vlenb
1378 ; RV64-NEXT: slli a1, a1, 3
1379 ; RV64-NEXT: sub sp, sp, a1
1380 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1381 ; RV64-NEXT: lui a1, 4080
1382 ; RV64-NEXT: li a2, 255
1383 ; RV64-NEXT: li a3, 56
1384 ; RV64-NEXT: lui a4, 16
1385 ; RV64-NEXT: li a5, 40
1386 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1387 ; RV64-NEXT: vand.vx v16, v8, a1, v0.t
1388 ; RV64-NEXT: slli a2, a2, 24
1389 ; RV64-NEXT: addiw a0, a4, -256
1390 ; RV64-NEXT: vsll.vi v16, v16, 24, v0.t
1391 ; RV64-NEXT: vand.vx v24, v8, a2, v0.t
1392 ; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
1393 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
1394 ; RV64-NEXT: addi a4, sp, 16
1395 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1396 ; RV64-NEXT: vsll.vx v24, v8, a3, v0.t
1397 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t
1398 ; RV64-NEXT: vsll.vx v16, v16, a5, v0.t
1399 ; RV64-NEXT: vor.vv v16, v24, v16, v0.t
1400 ; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
1401 ; RV64-NEXT: vor.vv v16, v16, v24, v0.t
1402 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1403 ; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t
1404 ; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
1405 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t
1406 ; RV64-NEXT: vor.vv v24, v16, v24, v0.t
1407 ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
1408 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t
1409 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1410 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1411 ; RV64-NEXT: vor.vv v8, v8, v16, v0.t
1412 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t
1413 ; RV64-NEXT: addi a0, sp, 16
1414 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1415 ; RV64-NEXT: vor.vv v8, v16, v8, v0.t
1416 ; RV64-NEXT: csrr a0, vlenb
1417 ; RV64-NEXT: slli a0, a0, 3
1418 ; RV64-NEXT: add sp, sp, a0
1419 ; RV64-NEXT: .cfi_def_cfa sp, 16
1420 ; RV64-NEXT: addi sp, sp, 16
1421 ; RV64-NEXT: .cfi_def_cfa_offset 0
1424 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i64:
1425 ; CHECK-ZVKB: # %bb.0:
1426 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1427 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
1428 ; CHECK-ZVKB-NEXT: ret
1429 %v = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
1430 ret <vscale x 8 x i64> %v
1433 define <vscale x 8 x i64> @vp_bswap_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
1434 ; RV32-LABEL: vp_bswap_nxv8i64_unmasked:
1436 ; RV32-NEXT: addi sp, sp, -16
1437 ; RV32-NEXT: .cfi_def_cfa_offset 16
1438 ; RV32-NEXT: csrr a1, vlenb
1439 ; RV32-NEXT: slli a1, a1, 4
1440 ; RV32-NEXT: sub sp, sp, a1
1441 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1442 ; RV32-NEXT: lui a1, 1044480
1443 ; RV32-NEXT: li a2, 56
1444 ; RV32-NEXT: lui a3, 16
1445 ; RV32-NEXT: li a4, 40
1446 ; RV32-NEXT: lui a5, 4080
1447 ; RV32-NEXT: addi a6, sp, 8
1448 ; RV32-NEXT: sw a1, 8(sp)
1449 ; RV32-NEXT: sw zero, 12(sp)
1450 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1451 ; RV32-NEXT: vsll.vx v24, v8, a2
1452 ; RV32-NEXT: addi a0, a3, -256
1453 ; RV32-NEXT: vsrl.vx v16, v8, a2
1454 ; RV32-NEXT: vsrl.vx v0, v8, a4
1455 ; RV32-NEXT: vand.vx v0, v0, a0
1456 ; RV32-NEXT: vor.vv v16, v0, v16
1457 ; RV32-NEXT: csrr a1, vlenb
1458 ; RV32-NEXT: slli a1, a1, 3
1459 ; RV32-NEXT: add a1, sp, a1
1460 ; RV32-NEXT: addi a1, a1, 16
1461 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1462 ; RV32-NEXT: vand.vx v0, v8, a0
1463 ; RV32-NEXT: vsll.vx v0, v0, a4
1464 ; RV32-NEXT: vor.vv v16, v24, v0
1465 ; RV32-NEXT: addi a0, sp, 16
1466 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1467 ; RV32-NEXT: vlse64.v v0, (a6), zero
1468 ; RV32-NEXT: vsrl.vi v16, v8, 24
1469 ; RV32-NEXT: vand.vx v16, v16, a5
1470 ; RV32-NEXT: vsrl.vi v24, v8, 8
1471 ; RV32-NEXT: vand.vv v24, v24, v0
1472 ; RV32-NEXT: vor.vv v16, v24, v16
1473 ; RV32-NEXT: vand.vv v24, v8, v0
1474 ; RV32-NEXT: vand.vx v8, v8, a5
1475 ; RV32-NEXT: vsll.vi v8, v8, 24
1476 ; RV32-NEXT: vsll.vi v24, v24, 8
1477 ; RV32-NEXT: vor.vv v8, v8, v24
1478 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1479 ; RV32-NEXT: vor.vv v8, v24, v8
1480 ; RV32-NEXT: csrr a0, vlenb
1481 ; RV32-NEXT: slli a0, a0, 3
1482 ; RV32-NEXT: add a0, sp, a0
1483 ; RV32-NEXT: addi a0, a0, 16
1484 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1485 ; RV32-NEXT: vor.vv v16, v16, v24
1486 ; RV32-NEXT: vor.vv v8, v8, v16
1487 ; RV32-NEXT: csrr a0, vlenb
1488 ; RV32-NEXT: slli a0, a0, 4
1489 ; RV32-NEXT: add sp, sp, a0
1490 ; RV32-NEXT: .cfi_def_cfa sp, 16
1491 ; RV32-NEXT: addi sp, sp, 16
1492 ; RV32-NEXT: .cfi_def_cfa_offset 0
1495 ; RV64-LABEL: vp_bswap_nxv8i64_unmasked:
1497 ; RV64-NEXT: addi sp, sp, -16
1498 ; RV64-NEXT: .cfi_def_cfa_offset 16
1499 ; RV64-NEXT: csrr a1, vlenb
1500 ; RV64-NEXT: slli a1, a1, 3
1501 ; RV64-NEXT: sub sp, sp, a1
1502 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1503 ; RV64-NEXT: lui a1, 4080
1504 ; RV64-NEXT: li a2, 255
1505 ; RV64-NEXT: li a3, 56
1506 ; RV64-NEXT: lui a4, 16
1507 ; RV64-NEXT: li a5, 40
1508 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1509 ; RV64-NEXT: vsrl.vi v24, v8, 24
1510 ; RV64-NEXT: addiw a0, a4, -256
1511 ; RV64-NEXT: vsrl.vx v16, v8, a3
1512 ; RV64-NEXT: vsrl.vx v0, v8, a5
1513 ; RV64-NEXT: vand.vx v0, v0, a0
1514 ; RV64-NEXT: vor.vv v16, v0, v16
1515 ; RV64-NEXT: addi a4, sp, 16
1516 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1517 ; RV64-NEXT: vsrl.vi v0, v8, 8
1518 ; RV64-NEXT: slli a2, a2, 24
1519 ; RV64-NEXT: vand.vx v24, v24, a1
1520 ; RV64-NEXT: vand.vx v0, v0, a2
1521 ; RV64-NEXT: vor.vv v24, v0, v24
1522 ; RV64-NEXT: vand.vx v0, v8, a1
1523 ; RV64-NEXT: vsll.vi v0, v0, 24
1524 ; RV64-NEXT: vand.vx v16, v8, a2
1525 ; RV64-NEXT: vsll.vi v16, v16, 8
1526 ; RV64-NEXT: vor.vv v16, v0, v16
1527 ; RV64-NEXT: vsll.vx v0, v8, a3
1528 ; RV64-NEXT: vand.vx v8, v8, a0
1529 ; RV64-NEXT: vsll.vx v8, v8, a5
1530 ; RV64-NEXT: vor.vv v8, v0, v8
1531 ; RV64-NEXT: vor.vv v8, v8, v16
1532 ; RV64-NEXT: addi a0, sp, 16
1533 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1534 ; RV64-NEXT: vor.vv v16, v24, v16
1535 ; RV64-NEXT: vor.vv v8, v8, v16
1536 ; RV64-NEXT: csrr a0, vlenb
1537 ; RV64-NEXT: slli a0, a0, 3
1538 ; RV64-NEXT: add sp, sp, a0
1539 ; RV64-NEXT: .cfi_def_cfa sp, 16
1540 ; RV64-NEXT: addi sp, sp, 16
1541 ; RV64-NEXT: .cfi_def_cfa_offset 0
1544 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i64_unmasked:
1545 ; CHECK-ZVKB: # %bb.0:
1546 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1547 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
1548 ; CHECK-ZVKB-NEXT: ret
1549 %v = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1550 ret <vscale x 8 x i64> %v
1553 ; Test splitting. Use i16 version for easier check.
1554 declare <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32)
1556 define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
1557 ; CHECK-LABEL: vp_bswap_nxv64i16:
1559 ; CHECK-NEXT: addi sp, sp, -16
1560 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1561 ; CHECK-NEXT: csrr a1, vlenb
1562 ; CHECK-NEXT: slli a1, a1, 4
1563 ; CHECK-NEXT: sub sp, sp, a1
1564 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1565 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1566 ; CHECK-NEXT: vmv1r.v v24, v0
1567 ; CHECK-NEXT: csrr a1, vlenb
1568 ; CHECK-NEXT: slli a1, a1, 3
1569 ; CHECK-NEXT: add a1, sp, a1
1570 ; CHECK-NEXT: addi a1, a1, 16
1571 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1572 ; CHECK-NEXT: csrr a1, vlenb
1573 ; CHECK-NEXT: srli a2, a1, 1
1574 ; CHECK-NEXT: slli a1, a1, 2
1575 ; CHECK-NEXT: vslidedown.vx v0, v0, a2
1576 ; CHECK-NEXT: sub a2, a0, a1
1577 ; CHECK-NEXT: sltu a3, a0, a2
1578 ; CHECK-NEXT: addi a3, a3, -1
1579 ; CHECK-NEXT: and a2, a3, a2
1580 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
1581 ; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t
1582 ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
1583 ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
1584 ; CHECK-NEXT: addi a2, sp, 16
1585 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1586 ; CHECK-NEXT: bltu a0, a1, .LBB32_2
1587 ; CHECK-NEXT: # %bb.1:
1588 ; CHECK-NEXT: mv a0, a1
1589 ; CHECK-NEXT: .LBB32_2:
1590 ; CHECK-NEXT: vmv1r.v v0, v24
1591 ; CHECK-NEXT: csrr a1, vlenb
1592 ; CHECK-NEXT: slli a1, a1, 3
1593 ; CHECK-NEXT: add a1, sp, a1
1594 ; CHECK-NEXT: addi a1, a1, 16
1595 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1596 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1597 ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
1598 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
1599 ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
1600 ; CHECK-NEXT: addi a0, sp, 16
1601 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1602 ; CHECK-NEXT: csrr a0, vlenb
1603 ; CHECK-NEXT: slli a0, a0, 4
1604 ; CHECK-NEXT: add sp, sp, a0
1605 ; CHECK-NEXT: .cfi_def_cfa sp, 16
1606 ; CHECK-NEXT: addi sp, sp, 16
1607 ; CHECK-NEXT: .cfi_def_cfa_offset 0
1610 ; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16:
1611 ; CHECK-ZVKB: # %bb.0:
1612 ; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1613 ; CHECK-ZVKB-NEXT: vmv1r.v v24, v0
1614 ; CHECK-ZVKB-NEXT: csrr a1, vlenb
1615 ; CHECK-ZVKB-NEXT: srli a2, a1, 1
1616 ; CHECK-ZVKB-NEXT: slli a1, a1, 2
1617 ; CHECK-ZVKB-NEXT: vslidedown.vx v0, v0, a2
1618 ; CHECK-ZVKB-NEXT: sub a2, a0, a1
1619 ; CHECK-ZVKB-NEXT: sltu a3, a0, a2
1620 ; CHECK-ZVKB-NEXT: addi a3, a3, -1
1621 ; CHECK-ZVKB-NEXT: and a2, a3, a2
1622 ; CHECK-ZVKB-NEXT: vsetvli zero, a2, e16, m8, ta, ma
1623 ; CHECK-ZVKB-NEXT: vrev8.v v16, v16, v0.t
1624 ; CHECK-ZVKB-NEXT: bltu a0, a1, .LBB32_2
1625 ; CHECK-ZVKB-NEXT: # %bb.1:
1626 ; CHECK-ZVKB-NEXT: mv a0, a1
1627 ; CHECK-ZVKB-NEXT: .LBB32_2:
1628 ; CHECK-ZVKB-NEXT: vmv1r.v v0, v24
1629 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1630 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
1631 ; CHECK-ZVKB-NEXT: ret
1632 %v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
1633 ret <vscale x 64 x i16> %v
1636 define <vscale x 64 x i16> @vp_bswap_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) {
1637 ; CHECK-LABEL: vp_bswap_nxv64i16_unmasked:
1639 ; CHECK-NEXT: csrr a1, vlenb
1640 ; CHECK-NEXT: slli a1, a1, 2
1641 ; CHECK-NEXT: sub a2, a0, a1
1642 ; CHECK-NEXT: sltu a3, a0, a2
1643 ; CHECK-NEXT: addi a3, a3, -1
1644 ; CHECK-NEXT: and a2, a3, a2
1645 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
1646 ; CHECK-NEXT: vsrl.vi v24, v16, 8
1647 ; CHECK-NEXT: vsll.vi v16, v16, 8
1648 ; CHECK-NEXT: vor.vv v16, v16, v24
1649 ; CHECK-NEXT: bltu a0, a1, .LBB33_2
1650 ; CHECK-NEXT: # %bb.1:
1651 ; CHECK-NEXT: mv a0, a1
1652 ; CHECK-NEXT: .LBB33_2:
1653 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1654 ; CHECK-NEXT: vsrl.vi v24, v8, 8
1655 ; CHECK-NEXT: vsll.vi v8, v8, 8
1656 ; CHECK-NEXT: vor.vv v8, v8, v24
1659 ; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16_unmasked:
1660 ; CHECK-ZVKB: # %bb.0:
1661 ; CHECK-ZVKB-NEXT: csrr a1, vlenb
1662 ; CHECK-ZVKB-NEXT: slli a1, a1, 2
1663 ; CHECK-ZVKB-NEXT: sub a2, a0, a1
1664 ; CHECK-ZVKB-NEXT: sltu a3, a0, a2
1665 ; CHECK-ZVKB-NEXT: addi a3, a3, -1
1666 ; CHECK-ZVKB-NEXT: and a2, a3, a2
1667 ; CHECK-ZVKB-NEXT: vsetvli zero, a2, e16, m8, ta, ma
1668 ; CHECK-ZVKB-NEXT: vrev8.v v16, v16
1669 ; CHECK-ZVKB-NEXT: bltu a0, a1, .LBB33_2
1670 ; CHECK-ZVKB-NEXT: # %bb.1:
1671 ; CHECK-ZVKB-NEXT: mv a0, a1
1672 ; CHECK-ZVKB-NEXT: .LBB33_2:
1673 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1674 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8
1675 ; CHECK-ZVKB-NEXT: ret
1676 %v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
1677 ret <vscale x 64 x i16> %v
1681 declare <vscale x 1 x i48> @llvm.vp.bswap.nxv1i48(<vscale x 1 x i48>, <vscale x 1 x i1>, i32)
1682 define <vscale x 1 x i48> @vp_bswap_nxv1i48(<vscale x 1 x i48> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1683 ; RV32-LABEL: vp_bswap_nxv1i48:
1685 ; RV32-NEXT: addi sp, sp, -16
1686 ; RV32-NEXT: .cfi_def_cfa_offset 16
1687 ; RV32-NEXT: lui a1, 1044480
1688 ; RV32-NEXT: li a2, 56
1689 ; RV32-NEXT: lui a3, 16
1690 ; RV32-NEXT: li a4, 40
1691 ; RV32-NEXT: lui a5, 4080
1692 ; RV32-NEXT: addi a6, sp, 8
1693 ; RV32-NEXT: sw a1, 8(sp)
1694 ; RV32-NEXT: sw zero, 12(sp)
1695 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1696 ; RV32-NEXT: vsll.vx v9, v8, a2, v0.t
1697 ; RV32-NEXT: addi a0, a3, -256
1698 ; RV32-NEXT: vand.vx v10, v8, a0, v0.t
1699 ; RV32-NEXT: vlse64.v v11, (a6), zero
1700 ; RV32-NEXT: vsll.vx v10, v10, a4, v0.t
1701 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
1702 ; RV32-NEXT: vand.vx v10, v8, a5, v0.t
1703 ; RV32-NEXT: vsll.vi v10, v10, 24, v0.t
1704 ; RV32-NEXT: vand.vv v12, v8, v11, v0.t
1705 ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
1706 ; RV32-NEXT: vor.vv v10, v10, v12, v0.t
1707 ; RV32-NEXT: vor.vv v9, v9, v10, v0.t
1708 ; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t
1709 ; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t
1710 ; RV32-NEXT: vand.vx v12, v12, a0, v0.t
1711 ; RV32-NEXT: vor.vv v10, v12, v10, v0.t
1712 ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
1713 ; RV32-NEXT: vand.vx v12, v12, a5, v0.t
1714 ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1715 ; RV32-NEXT: vand.vv v8, v8, v11, v0.t
1716 ; RV32-NEXT: vor.vv v8, v8, v12, v0.t
1717 ; RV32-NEXT: vor.vv v8, v8, v10, v0.t
1718 ; RV32-NEXT: vor.vv v8, v9, v8, v0.t
1719 ; RV32-NEXT: vsrl.vi v8, v8, 16, v0.t
1720 ; RV32-NEXT: addi sp, sp, 16
1721 ; RV32-NEXT: .cfi_def_cfa_offset 0
1724 ; RV64-LABEL: vp_bswap_nxv1i48:
1726 ; RV64-NEXT: lui a1, 4080
1727 ; RV64-NEXT: li a2, 255
1728 ; RV64-NEXT: li a3, 56
1729 ; RV64-NEXT: lui a4, 16
1730 ; RV64-NEXT: li a5, 40
1731 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1732 ; RV64-NEXT: vand.vx v9, v8, a1, v0.t
1733 ; RV64-NEXT: slli a2, a2, 24
1734 ; RV64-NEXT: addiw a0, a4, -256
1735 ; RV64-NEXT: vsll.vi v9, v9, 24, v0.t
1736 ; RV64-NEXT: vand.vx v10, v8, a2, v0.t
1737 ; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
1738 ; RV64-NEXT: vor.vv v9, v9, v10, v0.t
1739 ; RV64-NEXT: vsll.vx v10, v8, a3, v0.t
1740 ; RV64-NEXT: vand.vx v11, v8, a0, v0.t
1741 ; RV64-NEXT: vsll.vx v11, v11, a5, v0.t
1742 ; RV64-NEXT: vor.vv v10, v10, v11, v0.t
1743 ; RV64-NEXT: vor.vv v9, v10, v9, v0.t
1744 ; RV64-NEXT: vsrl.vx v10, v8, a3, v0.t
1745 ; RV64-NEXT: vsrl.vx v11, v8, a5, v0.t
1746 ; RV64-NEXT: vand.vx v11, v11, a0, v0.t
1747 ; RV64-NEXT: vor.vv v10, v11, v10, v0.t
1748 ; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t
1749 ; RV64-NEXT: vand.vx v11, v11, a1, v0.t
1750 ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
1751 ; RV64-NEXT: vand.vx v8, v8, a2, v0.t
1752 ; RV64-NEXT: vor.vv v8, v8, v11, v0.t
1753 ; RV64-NEXT: vor.vv v8, v8, v10, v0.t
1754 ; RV64-NEXT: vor.vv v8, v9, v8, v0.t
1755 ; RV64-NEXT: vsrl.vi v8, v8, 16, v0.t
1758 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i48:
1759 ; CHECK-ZVKB: # %bb.0:
1760 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1761 ; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t
1762 ; CHECK-ZVKB-NEXT: vsrl.vi v8, v8, 16, v0.t
1763 ; CHECK-ZVKB-NEXT: ret
1764 %v = call <vscale x 1 x i48> @llvm.vp.bswap.nxv1i48(<vscale x 1 x i48> %va, <vscale x 1 x i1> %m, i32 %evl)
1765 ret <vscale x 1 x i48> %v