1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
4 define <vscale x 1 x i64> @test_vp_reverse_nxv1i64_masked(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
5 ; CHECK-LABEL: test_vp_reverse_nxv1i64_masked:
7 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
8 ; CHECK-NEXT: vid.v v9, v0.t
9 ; CHECK-NEXT: addi a0, a0, -1
10 ; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
11 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
12 ; CHECK-NEXT: vmv.v.v v8, v9
14 %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 %evl)
15 ret <vscale x 1 x i64> %dst
18 define <vscale x 1 x i64> @test_vp_reverse_nxv1i64(<vscale x 1 x i64> %src, i32 zeroext %evl) {
19 ; CHECK-LABEL: test_vp_reverse_nxv1i64:
21 ; CHECK-NEXT: addi a1, a0, -1
22 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
23 ; CHECK-NEXT: vid.v v9
24 ; CHECK-NEXT: vrsub.vx v10, v9, a1
25 ; CHECK-NEXT: vrgather.vv v9, v8, v10
26 ; CHECK-NEXT: vmv.v.v v8, v9
29 %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl)
30 ret <vscale x 1 x i64> %dst
33 define <vscale x 2 x i32> @test_vp_reverse_nxv2i32_masked(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
34 ; CHECK-LABEL: test_vp_reverse_nxv2i32_masked:
36 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
37 ; CHECK-NEXT: vid.v v9, v0.t
38 ; CHECK-NEXT: addi a0, a0, -1
39 ; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
40 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
41 ; CHECK-NEXT: vmv.v.v v8, v9
43 %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 %evl)
44 ret <vscale x 2 x i32> %dst
47 define <vscale x 2 x i32> @test_vp_reverse_nxv2i32(<vscale x 2 x i32> %src, i32 zeroext %evl) {
48 ; CHECK-LABEL: test_vp_reverse_nxv2i32:
50 ; CHECK-NEXT: addi a1, a0, -1
51 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
52 ; CHECK-NEXT: vid.v v9
53 ; CHECK-NEXT: vrsub.vx v10, v9, a1
54 ; CHECK-NEXT: vrgather.vv v9, v8, v10
55 ; CHECK-NEXT: vmv.v.v v8, v9
58 %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl)
59 ret <vscale x 2 x i32> %dst
62 define <vscale x 4 x i16> @test_vp_reverse_nxv4i16_masked(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
63 ; CHECK-LABEL: test_vp_reverse_nxv4i16_masked:
65 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
66 ; CHECK-NEXT: vid.v v9, v0.t
67 ; CHECK-NEXT: addi a0, a0, -1
68 ; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
69 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
70 ; CHECK-NEXT: vmv.v.v v8, v9
72 %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 %evl)
73 ret <vscale x 4 x i16> %dst
76 define <vscale x 4 x i16> @test_vp_reverse_nxv4i16(<vscale x 4 x i16> %src, i32 zeroext %evl) {
77 ; CHECK-LABEL: test_vp_reverse_nxv4i16:
79 ; CHECK-NEXT: addi a1, a0, -1
80 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
81 ; CHECK-NEXT: vid.v v9
82 ; CHECK-NEXT: vrsub.vx v10, v9, a1
83 ; CHECK-NEXT: vrgather.vv v9, v8, v10
84 ; CHECK-NEXT: vmv.v.v v8, v9
87 %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
88 ret <vscale x 4 x i16> %dst
91 define <vscale x 8 x i8> @test_vp_reverse_nxv8i8_masked(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
92 ; CHECK-LABEL: test_vp_reverse_nxv8i8_masked:
94 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
95 ; CHECK-NEXT: vid.v v10, v0.t
96 ; CHECK-NEXT: addi a0, a0, -1
97 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
98 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
99 ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t
100 ; CHECK-NEXT: vmv.v.v v8, v9
102 %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 %evl)
103 ret <vscale x 8 x i8> %dst
106 define <vscale x 8 x i8> @test_vp_reverse_nxv8i8(<vscale x 8 x i8> %src, i32 zeroext %evl) {
107 ; CHECK-LABEL: test_vp_reverse_nxv8i8:
109 ; CHECK-NEXT: addi a1, a0, -1
110 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
111 ; CHECK-NEXT: vid.v v10
112 ; CHECK-NEXT: vrsub.vx v10, v10, a1
113 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
114 ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10
115 ; CHECK-NEXT: vmv.v.v v8, v9
118 %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
119 ret <vscale x 8 x i8> %dst
122 define <vscale x 2 x i64> @test_vp_reverse_nxv2i64_masked(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
123 ; CHECK-LABEL: test_vp_reverse_nxv2i64_masked:
125 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
126 ; CHECK-NEXT: vid.v v10, v0.t
127 ; CHECK-NEXT: addi a0, a0, -1
128 ; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
129 ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
130 ; CHECK-NEXT: vmv.v.v v8, v10
132 %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 %evl)
133 ret <vscale x 2 x i64> %dst
136 define <vscale x 2 x i64> @test_vp_reverse_nxv2i64(<vscale x 2 x i64> %src, i32 zeroext %evl) {
137 ; CHECK-LABEL: test_vp_reverse_nxv2i64:
139 ; CHECK-NEXT: addi a1, a0, -1
140 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
141 ; CHECK-NEXT: vid.v v10
142 ; CHECK-NEXT: vrsub.vx v12, v10, a1
143 ; CHECK-NEXT: vrgather.vv v10, v8, v12
144 ; CHECK-NEXT: vmv.v.v v8, v10
147 %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl)
148 ret <vscale x 2 x i64> %dst
151 define <vscale x 4 x i32> @test_vp_reverse_nxv4i32_masked(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
152 ; CHECK-LABEL: test_vp_reverse_nxv4i32_masked:
154 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
155 ; CHECK-NEXT: vid.v v10, v0.t
156 ; CHECK-NEXT: addi a0, a0, -1
157 ; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
158 ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
159 ; CHECK-NEXT: vmv.v.v v8, v10
161 %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 %evl)
162 ret <vscale x 4 x i32> %dst
165 define <vscale x 4 x i32> @test_vp_reverse_nxv4i32(<vscale x 4 x i32> %src, i32 zeroext %evl) {
166 ; CHECK-LABEL: test_vp_reverse_nxv4i32:
168 ; CHECK-NEXT: addi a1, a0, -1
169 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
170 ; CHECK-NEXT: vid.v v10
171 ; CHECK-NEXT: vrsub.vx v12, v10, a1
172 ; CHECK-NEXT: vrgather.vv v10, v8, v12
173 ; CHECK-NEXT: vmv.v.v v8, v10
176 %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
177 ret <vscale x 4 x i32> %dst
180 define <vscale x 8 x i16> @test_vp_reverse_nxv8i16_masked(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
181 ; CHECK-LABEL: test_vp_reverse_nxv8i16_masked:
183 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
184 ; CHECK-NEXT: vid.v v10, v0.t
185 ; CHECK-NEXT: addi a0, a0, -1
186 ; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
187 ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
188 ; CHECK-NEXT: vmv.v.v v8, v10
190 %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 %evl)
191 ret <vscale x 8 x i16> %dst
194 define <vscale x 8 x i16> @test_vp_reverse_nxv8i16(<vscale x 8 x i16> %src, i32 zeroext %evl) {
195 ; CHECK-LABEL: test_vp_reverse_nxv8i16:
197 ; CHECK-NEXT: addi a1, a0, -1
198 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
199 ; CHECK-NEXT: vid.v v10
200 ; CHECK-NEXT: vrsub.vx v12, v10, a1
201 ; CHECK-NEXT: vrgather.vv v10, v8, v12
202 ; CHECK-NEXT: vmv.v.v v8, v10
205 %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
206 ret <vscale x 8 x i16> %dst
209 define <vscale x 16 x i8> @test_vp_reverse_nxv16i8_masked(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
210 ; CHECK-LABEL: test_vp_reverse_nxv16i8_masked:
212 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
213 ; CHECK-NEXT: vid.v v12, v0.t
214 ; CHECK-NEXT: addi a0, a0, -1
215 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t
216 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
217 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
218 ; CHECK-NEXT: vmv.v.v v8, v10
220 %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 %evl)
221 ret <vscale x 16 x i8> %dst
224 define <vscale x 16 x i8> @test_vp_reverse_nxv16i8(<vscale x 16 x i8> %src, i32 zeroext %evl) {
225 ; CHECK-LABEL: test_vp_reverse_nxv16i8:
227 ; CHECK-NEXT: addi a1, a0, -1
228 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
229 ; CHECK-NEXT: vid.v v12
230 ; CHECK-NEXT: vrsub.vx v12, v12, a1
231 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
232 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
233 ; CHECK-NEXT: vmv.v.v v8, v10
236 %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
237 ret <vscale x 16 x i8> %dst
240 define <vscale x 4 x i64> @test_vp_reverse_nxv4i64_masked(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
241 ; CHECK-LABEL: test_vp_reverse_nxv4i64_masked:
243 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
244 ; CHECK-NEXT: vid.v v12, v0.t
245 ; CHECK-NEXT: addi a0, a0, -1
246 ; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
247 ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
248 ; CHECK-NEXT: vmv.v.v v8, v12
250 %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 %evl)
251 ret <vscale x 4 x i64> %dst
254 define <vscale x 4 x i64> @test_vp_reverse_nxv4i64(<vscale x 4 x i64> %src, i32 zeroext %evl) {
255 ; CHECK-LABEL: test_vp_reverse_nxv4i64:
257 ; CHECK-NEXT: addi a1, a0, -1
258 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
259 ; CHECK-NEXT: vid.v v12
260 ; CHECK-NEXT: vrsub.vx v16, v12, a1
261 ; CHECK-NEXT: vrgather.vv v12, v8, v16
262 ; CHECK-NEXT: vmv.v.v v8, v12
265 %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
266 ret <vscale x 4 x i64> %dst
269 define <vscale x 8 x i32> @test_vp_reverse_nxv8i32_masked(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
270 ; CHECK-LABEL: test_vp_reverse_nxv8i32_masked:
272 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
273 ; CHECK-NEXT: vid.v v12, v0.t
274 ; CHECK-NEXT: addi a0, a0, -1
275 ; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
276 ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
277 ; CHECK-NEXT: vmv.v.v v8, v12
279 %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 %evl)
280 ret <vscale x 8 x i32> %dst
283 define <vscale x 8 x i32> @test_vp_reverse_nxv8i32(<vscale x 8 x i32> %src, i32 zeroext %evl) {
284 ; CHECK-LABEL: test_vp_reverse_nxv8i32:
286 ; CHECK-NEXT: addi a1, a0, -1
287 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
288 ; CHECK-NEXT: vid.v v12
289 ; CHECK-NEXT: vrsub.vx v16, v12, a1
290 ; CHECK-NEXT: vrgather.vv v12, v8, v16
291 ; CHECK-NEXT: vmv.v.v v8, v12
294 %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
295 ret <vscale x 8 x i32> %dst
298 define <vscale x 16 x i16> @test_vp_reverse_nxv16i16_masked(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
299 ; CHECK-LABEL: test_vp_reverse_nxv16i16_masked:
301 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
302 ; CHECK-NEXT: vid.v v12, v0.t
303 ; CHECK-NEXT: addi a0, a0, -1
304 ; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
305 ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
306 ; CHECK-NEXT: vmv.v.v v8, v12
308 %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 %evl)
309 ret <vscale x 16 x i16> %dst
312 define <vscale x 16 x i16> @test_vp_reverse_nxv16i16(<vscale x 16 x i16> %src, i32 zeroext %evl) {
313 ; CHECK-LABEL: test_vp_reverse_nxv16i16:
315 ; CHECK-NEXT: addi a1, a0, -1
316 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
317 ; CHECK-NEXT: vid.v v12
318 ; CHECK-NEXT: vrsub.vx v16, v12, a1
319 ; CHECK-NEXT: vrgather.vv v12, v8, v16
320 ; CHECK-NEXT: vmv.v.v v8, v12
323 %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
324 ret <vscale x 16 x i16> %dst
327 define <vscale x 32 x i8> @test_vp_reverse_nxv32i8_masked(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
328 ; CHECK-LABEL: test_vp_reverse_nxv32i8_masked:
330 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
331 ; CHECK-NEXT: vid.v v16, v0.t
332 ; CHECK-NEXT: addi a0, a0, -1
333 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t
334 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
335 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
336 ; CHECK-NEXT: vmv.v.v v8, v12
338 %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 %evl)
339 ret <vscale x 32 x i8> %dst
342 define <vscale x 32 x i8> @test_vp_reverse_nxv32i8(<vscale x 32 x i8> %src, i32 zeroext %evl) {
343 ; CHECK-LABEL: test_vp_reverse_nxv32i8:
345 ; CHECK-NEXT: addi a1, a0, -1
346 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
347 ; CHECK-NEXT: vid.v v16
348 ; CHECK-NEXT: vrsub.vx v16, v16, a1
349 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
350 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
351 ; CHECK-NEXT: vmv.v.v v8, v12
354 %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
355 ret <vscale x 32 x i8> %dst
358 define <vscale x 8 x i64> @test_vp_reverse_nxv8i64_masked(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
359 ; CHECK-LABEL: test_vp_reverse_nxv8i64_masked:
361 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
362 ; CHECK-NEXT: vid.v v16, v0.t
363 ; CHECK-NEXT: addi a0, a0, -1
364 ; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
365 ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
366 ; CHECK-NEXT: vmv.v.v v8, v16
368 %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 %evl)
369 ret <vscale x 8 x i64> %dst
372 define <vscale x 8 x i64> @test_vp_reverse_nxv8i64(<vscale x 8 x i64> %src, i32 zeroext %evl) {
373 ; CHECK-LABEL: test_vp_reverse_nxv8i64:
375 ; CHECK-NEXT: addi a1, a0, -1
376 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
377 ; CHECK-NEXT: vid.v v16
378 ; CHECK-NEXT: vrsub.vx v24, v16, a1
379 ; CHECK-NEXT: vrgather.vv v16, v8, v24
380 ; CHECK-NEXT: vmv.v.v v8, v16
383 %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
384 ret <vscale x 8 x i64> %dst
387 define <vscale x 16 x i32> @test_vp_reverse_nxv16i32_masked(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
388 ; CHECK-LABEL: test_vp_reverse_nxv16i32_masked:
390 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
391 ; CHECK-NEXT: vid.v v16, v0.t
392 ; CHECK-NEXT: addi a0, a0, -1
393 ; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
394 ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
395 ; CHECK-NEXT: vmv.v.v v8, v16
397 %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 %evl)
398 ret <vscale x 16 x i32> %dst
401 define <vscale x 16 x i32> @test_vp_reverse_nxv16i32(<vscale x 16 x i32> %src, i32 zeroext %evl) {
402 ; CHECK-LABEL: test_vp_reverse_nxv16i32:
404 ; CHECK-NEXT: addi a1, a0, -1
405 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
406 ; CHECK-NEXT: vid.v v16
407 ; CHECK-NEXT: vrsub.vx v24, v16, a1
408 ; CHECK-NEXT: vrgather.vv v16, v8, v24
409 ; CHECK-NEXT: vmv.v.v v8, v16
412 %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
413 ret <vscale x 16 x i32> %dst
416 define <vscale x 32 x i16> @test_vp_reverse_nxv32i16_masked(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
417 ; CHECK-LABEL: test_vp_reverse_nxv32i16_masked:
419 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
420 ; CHECK-NEXT: vid.v v16, v0.t
421 ; CHECK-NEXT: addi a0, a0, -1
422 ; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
423 ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
424 ; CHECK-NEXT: vmv.v.v v8, v16
426 %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 %evl)
427 ret <vscale x 32 x i16> %dst
430 define <vscale x 32 x i16> @test_vp_reverse_nxv32i16(<vscale x 32 x i16> %src, i32 zeroext %evl) {
431 ; CHECK-LABEL: test_vp_reverse_nxv32i16:
433 ; CHECK-NEXT: addi a1, a0, -1
434 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
435 ; CHECK-NEXT: vid.v v16
436 ; CHECK-NEXT: vrsub.vx v24, v16, a1
437 ; CHECK-NEXT: vrgather.vv v16, v8, v24
438 ; CHECK-NEXT: vmv.v.v v8, v16
441 %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
442 ret <vscale x 32 x i16> %dst
445 define <vscale x 64 x i8> @test_vp_reverse_nxv64i8_masked(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 zeroext %evl) {
446 ; CHECK-LABEL: test_vp_reverse_nxv64i8_masked:
448 ; CHECK-NEXT: csrr a1, vlenb
449 ; CHECK-NEXT: slli a2, a1, 2
450 ; CHECK-NEXT: addi a2, a2, -1
451 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
452 ; CHECK-NEXT: vid.v v16
453 ; CHECK-NEXT: vrsub.vx v16, v16, a2
454 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
455 ; CHECK-NEXT: vrgatherei16.vv v28, v8, v16
456 ; CHECK-NEXT: vrgatherei16.vv v24, v12, v16
457 ; CHECK-NEXT: slli a1, a1, 3
458 ; CHECK-NEXT: sub a1, a1, a0
459 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
460 ; CHECK-NEXT: vslidedown.vx v8, v24, a1, v0.t
462 %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 %evl)
463 ret <vscale x 64 x i8> %dst
466 define <vscale x 64 x i8> @test_vp_reverse_nxv64i8(<vscale x 64 x i8> %src, i32 zeroext %evl) {
467 ; CHECK-LABEL: test_vp_reverse_nxv64i8:
469 ; CHECK-NEXT: csrr a1, vlenb
470 ; CHECK-NEXT: slli a2, a1, 2
471 ; CHECK-NEXT: addi a2, a2, -1
472 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
473 ; CHECK-NEXT: vid.v v16
474 ; CHECK-NEXT: vrsub.vx v16, v16, a2
475 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
476 ; CHECK-NEXT: vrgatherei16.vv v28, v8, v16
477 ; CHECK-NEXT: vrgatherei16.vv v24, v12, v16
478 ; CHECK-NEXT: slli a1, a1, 3
479 ; CHECK-NEXT: sub a1, a1, a0
480 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
481 ; CHECK-NEXT: vslidedown.vx v8, v24, a1
484 %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> splat (i1 1), i32 %evl)
485 ret <vscale x 64 x i8> %dst
488 define <vscale x 128 x i8> @test_vp_reverse_nxv128i8(<vscale x 128 x i8> %src, i32 zeroext %evl) {
489 ; CHECK-LABEL: test_vp_reverse_nxv128i8:
491 ; CHECK-NEXT: csrr a1, vlenb
492 ; CHECK-NEXT: slli a1, a1, 3
493 ; CHECK-NEXT: mv a2, a0
494 ; CHECK-NEXT: bltu a0, a1, .LBB32_2
495 ; CHECK-NEXT: # %bb.1:
496 ; CHECK-NEXT: mv a2, a1
497 ; CHECK-NEXT: .LBB32_2:
498 ; CHECK-NEXT: addi sp, sp, -80
499 ; CHECK-NEXT: .cfi_def_cfa_offset 80
500 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
501 ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
502 ; CHECK-NEXT: .cfi_offset ra, -8
503 ; CHECK-NEXT: .cfi_offset s0, -16
504 ; CHECK-NEXT: addi s0, sp, 80
505 ; CHECK-NEXT: .cfi_def_cfa s0, 0
506 ; CHECK-NEXT: csrr a3, vlenb
507 ; CHECK-NEXT: slli a3, a3, 4
508 ; CHECK-NEXT: sub sp, sp, a3
509 ; CHECK-NEXT: andi sp, sp, -64
510 ; CHECK-NEXT: addi a3, sp, 64
511 ; CHECK-NEXT: add a4, a0, a3
512 ; CHECK-NEXT: addi a4, a4, -1
513 ; CHECK-NEXT: li a5, -1
514 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
515 ; CHECK-NEXT: vsse8.v v8, (a4), a5
516 ; CHECK-NEXT: sub a4, a4, a2
517 ; CHECK-NEXT: sub a6, a0, a1
518 ; CHECK-NEXT: sltu a0, a0, a6
519 ; CHECK-NEXT: addi a0, a0, -1
520 ; CHECK-NEXT: and a0, a0, a6
521 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
522 ; CHECK-NEXT: vsse8.v v16, (a4), a5
523 ; CHECK-NEXT: add a1, a3, a1
524 ; CHECK-NEXT: vle8.v v16, (a1)
525 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
526 ; CHECK-NEXT: vle8.v v8, (a3)
527 ; CHECK-NEXT: addi sp, s0, -80
528 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
529 ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
530 ; CHECK-NEXT: addi sp, sp, 80
533 %dst = call <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8> %src, <vscale x 128 x i1> splat (i1 1), i32 %evl)
534 ret <vscale x 128 x i8> %dst
538 declare <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64>,<vscale x 1 x i1>,i32)
539 declare <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32>,<vscale x 2 x i1>,i32)
540 declare <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16>,<vscale x 4 x i1>,i32)
541 declare <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8>,<vscale x 8 x i1>,i32)
544 declare <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i1>,i32)
545 declare <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i1>,i32)
546 declare <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i1>,i32)
547 declare <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i1>,i32)
550 declare <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64>,<vscale x 4 x i1>,i32)
551 declare <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32>,<vscale x 8 x i1>,i32)
552 declare <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i1>,i32)
553 declare <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8>,<vscale x 32 x i1>,i32)
556 declare <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64>,<vscale x 8 x i1>,i32)
557 declare <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32>,<vscale x 16 x i1>,i32)
558 declare <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16>,<vscale x 32 x i1>,i32)
559 declare <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8>,<vscale x 64 x i1>,i32)
561 declare <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8>,<vscale x 128 x i1>,i32)