1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV32-ZVBB
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV64-ZVBB
7 define <2 x i1> @reverse_v2i1(<2 x i1> %a) {
8 ; NO-ZVBB-LABEL: reverse_v2i1:
10 ; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
11 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
12 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
13 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
14 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
15 ; NO-ZVBB-NEXT: vmsne.vi v0, v9, 0
18 ; ZVBB-LABEL: reverse_v2i1:
20 ; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
21 ; ZVBB-NEXT: vbrev.v v8, v0
22 ; ZVBB-NEXT: vsrl.vi v0, v8, 6
24 %res = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> <i32 1, i32 0>
28 define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
29 ; NO-ZVBB-LABEL: reverse_v4i1:
31 ; NO-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
32 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
33 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
34 ; NO-ZVBB-NEXT: vid.v v9
35 ; NO-ZVBB-NEXT: vrsub.vi v9, v9, 3
36 ; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9
37 ; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0
40 ; ZVBB-LABEL: reverse_v4i1:
42 ; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
43 ; ZVBB-NEXT: vbrev.v v8, v0
44 ; ZVBB-NEXT: vsrl.vi v0, v8, 4
46 %res = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
50 define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
51 ; NO-ZVBB-LABEL: reverse_v8i1:
53 ; NO-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
54 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
55 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
56 ; NO-ZVBB-NEXT: vid.v v9
57 ; NO-ZVBB-NEXT: vrsub.vi v9, v9, 7
58 ; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9
59 ; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0
62 ; ZVBB-LABEL: reverse_v8i1:
64 ; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
65 ; ZVBB-NEXT: vbrev.v v0, v0
67 %res = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
71 define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
72 ; NO-ZVBB-LABEL: reverse_v16i1:
74 ; NO-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
75 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
76 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
77 ; NO-ZVBB-NEXT: vid.v v9
78 ; NO-ZVBB-NEXT: vrsub.vi v9, v9, 15
79 ; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9
80 ; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0
83 ; ZVBB-LABEL: reverse_v16i1:
85 ; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
86 ; ZVBB-NEXT: vbrev.v v0, v0
88 %res = shufflevector <16 x i1> %a, <16 x i1> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
92 define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
93 ; NO-ZVBB-LABEL: reverse_v32i1:
95 ; NO-ZVBB-NEXT: li a0, 32
96 ; NO-ZVBB-NEXT: csrr a1, vlenb
97 ; NO-ZVBB-NEXT: vsetvli a2, zero, e16, m2, ta, ma
98 ; NO-ZVBB-NEXT: vid.v v8
99 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
100 ; NO-ZVBB-NEXT: vmv.v.i v10, 0
101 ; NO-ZVBB-NEXT: addi a2, a1, -1
102 ; NO-ZVBB-NEXT: slli a1, a1, 1
103 ; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
104 ; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma
105 ; NO-ZVBB-NEXT: vrsub.vx v8, v8, a2
106 ; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma
107 ; NO-ZVBB-NEXT: vrgatherei16.vv v13, v10, v8
108 ; NO-ZVBB-NEXT: vrgatherei16.vv v12, v11, v8
109 ; NO-ZVBB-NEXT: addi a1, a1, -32
110 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
111 ; NO-ZVBB-NEXT: vslidedown.vx v8, v12, a1
112 ; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0
115 ; ZVBB-LABEL: reverse_v32i1:
117 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
118 ; ZVBB-NEXT: vbrev.v v0, v0
120 %res = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
124 define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
125 ; NO-ZVBB-LABEL: reverse_v64i1:
127 ; NO-ZVBB-NEXT: li a0, 64
128 ; NO-ZVBB-NEXT: csrr a1, vlenb
129 ; NO-ZVBB-NEXT: vsetvli a2, zero, e16, m2, ta, ma
130 ; NO-ZVBB-NEXT: vid.v v12
131 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
132 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
133 ; NO-ZVBB-NEXT: addi a2, a1, -1
134 ; NO-ZVBB-NEXT: slli a1, a1, 2
135 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
136 ; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma
137 ; NO-ZVBB-NEXT: vrsub.vx v12, v12, a2
138 ; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma
139 ; NO-ZVBB-NEXT: vrgatherei16.vv v19, v8, v12
140 ; NO-ZVBB-NEXT: vrgatherei16.vv v18, v9, v12
141 ; NO-ZVBB-NEXT: vrgatherei16.vv v17, v10, v12
142 ; NO-ZVBB-NEXT: vrgatherei16.vv v16, v11, v12
143 ; NO-ZVBB-NEXT: addi a1, a1, -64
144 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
145 ; NO-ZVBB-NEXT: vslidedown.vx v8, v16, a1
146 ; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0
149 ; ZVBB-LABEL: reverse_v64i1:
151 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
152 ; ZVBB-NEXT: vbrev.v v0, v0
154 %res = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
158 define <128 x i1> @reverse_v128i1(<128 x i1> %a) {
159 ; CHECK-LABEL: reverse_v128i1:
161 ; CHECK-NEXT: li a0, 128
162 ; CHECK-NEXT: csrr a1, vlenb
163 ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
164 ; CHECK-NEXT: vid.v v8
165 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
166 ; CHECK-NEXT: vmv.v.i v16, 0
167 ; CHECK-NEXT: addi a2, a1, -1
168 ; CHECK-NEXT: slli a1, a1, 3
169 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
170 ; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma
171 ; CHECK-NEXT: vrsub.vx v24, v8, a2
172 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
173 ; CHECK-NEXT: vrgatherei16.vv v15, v16, v24
174 ; CHECK-NEXT: vrgatherei16.vv v14, v17, v24
175 ; CHECK-NEXT: vrgatherei16.vv v13, v18, v24
176 ; CHECK-NEXT: vrgatherei16.vv v12, v19, v24
177 ; CHECK-NEXT: vrgatherei16.vv v11, v20, v24
178 ; CHECK-NEXT: vrgatherei16.vv v10, v21, v24
179 ; CHECK-NEXT: vrgatherei16.vv v9, v22, v24
180 ; CHECK-NEXT: vrgatherei16.vv v8, v23, v24
181 ; CHECK-NEXT: addi a1, a1, -128
182 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
183 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
184 ; CHECK-NEXT: vmsne.vi v0, v8, 0
186 %res = shufflevector <128 x i1> %a, <128 x i1> poison, <128 x i32> <i32 127, i32 126, i32 125, i32 124, i32 123, i32 122, i32 121, i32 120, i32 119, i32 118, i32 117, i32 116, i32 115, i32 114, i32 113, i32 112, i32 111, i32 110, i32 109, i32 108, i32 107, i32 106, i32 105, i32 104, i32 103, i32 102, i32 101, i32 100, i32 99, i32 98, i32 97, i32 96, i32 95, i32 94, i32 93, i32 92, i32 91, i32 90, i32 89, i32 88, i32 87, i32 86, i32 85, i32 84, i32 83, i32 82, i32 81, i32 80, i32 79, i32 78, i32 77, i32 76, i32 75, i32 74, i32 73, i32 72, i32 71, i32 70, i32 69, i32 68, i32 67, i32 66, i32 65, i32 64, i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
190 define <1 x i8> @reverse_v1i8(<1 x i8> %a) {
191 ; CHECK-LABEL: reverse_v1i8:
194 %res = shufflevector <1 x i8> %a, <1 x i8> poison, <1 x i32> <i32 0>
198 define <2 x i8> @reverse_v2i8(<2 x i8> %a) {
199 ; NO-ZVBB-LABEL: reverse_v2i8:
201 ; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
202 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
203 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
204 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
207 ; ZVBB-LABEL: reverse_v2i8:
209 ; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
210 ; ZVBB-NEXT: vrev8.v v8, v8
212 %res = shufflevector <2 x i8> %a, <2 x i8> poison, <2 x i32> <i32 1, i32 0>
216 define <4 x i8> @reverse_v4i8(<4 x i8> %a) {
217 ; CHECK-LABEL: reverse_v4i8:
219 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
220 ; CHECK-NEXT: vid.v v9
221 ; CHECK-NEXT: vrsub.vi v10, v9, 3
222 ; CHECK-NEXT: vrgather.vv v9, v8, v10
223 ; CHECK-NEXT: vmv1r.v v8, v9
225 %res = shufflevector <4 x i8> %a, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
229 define <8 x i8> @reverse_v8i8(<8 x i8> %a) {
230 ; CHECK-LABEL: reverse_v8i8:
232 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
233 ; CHECK-NEXT: vid.v v9
234 ; CHECK-NEXT: vrsub.vi v10, v9, 7
235 ; CHECK-NEXT: vrgather.vv v9, v8, v10
236 ; CHECK-NEXT: vmv1r.v v8, v9
238 %res = shufflevector <8 x i8> %a, <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
242 define <16 x i8> @reverse_v16i8(<16 x i8> %a) {
243 ; CHECK-LABEL: reverse_v16i8:
245 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
246 ; CHECK-NEXT: vid.v v9
247 ; CHECK-NEXT: vrsub.vi v10, v9, 15
248 ; CHECK-NEXT: vrgather.vv v9, v8, v10
249 ; CHECK-NEXT: vmv.v.v v8, v9
251 %res = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
255 define <32 x i8> @reverse_v32i8(<32 x i8> %a) {
256 ; CHECK-LABEL: reverse_v32i8:
258 ; CHECK-NEXT: csrr a0, vlenb
259 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
260 ; CHECK-NEXT: vid.v v10
261 ; CHECK-NEXT: addi a1, a0, -1
262 ; CHECK-NEXT: slli a0, a0, 1
263 ; CHECK-NEXT: vrsub.vx v10, v10, a1
264 ; CHECK-NEXT: addi a0, a0, -32
265 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
266 ; CHECK-NEXT: vrgatherei16.vv v13, v8, v10
267 ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10
268 ; CHECK-NEXT: li a1, 32
269 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
270 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
272 %res = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
276 define <64 x i8> @reverse_v64i8(<64 x i8> %a) {
277 ; CHECK-LABEL: reverse_v64i8:
279 ; CHECK-NEXT: csrr a0, vlenb
280 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
281 ; CHECK-NEXT: vid.v v12
282 ; CHECK-NEXT: addi a1, a0, -1
283 ; CHECK-NEXT: slli a0, a0, 2
284 ; CHECK-NEXT: vrsub.vx v12, v12, a1
285 ; CHECK-NEXT: addi a0, a0, -64
286 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
287 ; CHECK-NEXT: vrgatherei16.vv v19, v8, v12
288 ; CHECK-NEXT: vrgatherei16.vv v18, v9, v12
289 ; CHECK-NEXT: vrgatherei16.vv v17, v10, v12
290 ; CHECK-NEXT: vrgatherei16.vv v16, v11, v12
291 ; CHECK-NEXT: li a1, 64
292 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
293 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
295 %res = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
299 define <1 x i16> @reverse_v1i16(<1 x i16> %a) {
300 ; CHECK-LABEL: reverse_v1i16:
303 %res = shufflevector <1 x i16> %a, <1 x i16> poison, <1 x i32> <i32 0>
307 define <2 x i16> @reverse_v2i16(<2 x i16> %a) {
308 ; NO-ZVBB-LABEL: reverse_v2i16:
310 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
311 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
312 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
313 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
316 ; ZVBB-LABEL: reverse_v2i16:
318 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
319 ; ZVBB-NEXT: vror.vi v8, v8, 16
321 %res = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
325 define <4 x i16> @reverse_v4i16(<4 x i16> %a) {
326 ; CHECK-LABEL: reverse_v4i16:
328 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
329 ; CHECK-NEXT: vid.v v9
330 ; CHECK-NEXT: vrsub.vi v10, v9, 3
331 ; CHECK-NEXT: vrgather.vv v9, v8, v10
332 ; CHECK-NEXT: vmv1r.v v8, v9
334 %res = shufflevector <4 x i16> %a, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
338 define <8 x i16> @reverse_v8i16(<8 x i16> %a) {
339 ; CHECK-LABEL: reverse_v8i16:
341 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
342 ; CHECK-NEXT: vid.v v9
343 ; CHECK-NEXT: vrsub.vi v10, v9, 7
344 ; CHECK-NEXT: vrgather.vv v9, v8, v10
345 ; CHECK-NEXT: vmv.v.v v8, v9
347 %res = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
351 define <16 x i16> @reverse_v16i16(<16 x i16> %a) {
352 ; CHECK-LABEL: reverse_v16i16:
354 ; CHECK-NEXT: csrr a0, vlenb
355 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
356 ; CHECK-NEXT: vid.v v10
357 ; CHECK-NEXT: srli a1, a0, 1
358 ; CHECK-NEXT: addi a1, a1, -1
359 ; CHECK-NEXT: vrsub.vx v10, v10, a1
360 ; CHECK-NEXT: vrgather.vv v13, v8, v10
361 ; CHECK-NEXT: vrgather.vv v12, v9, v10
362 ; CHECK-NEXT: addi a0, a0, -16
363 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
364 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
366 %res = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
370 define <32 x i16> @reverse_v32i16(<32 x i16> %a) {
371 ; CHECK-LABEL: reverse_v32i16:
373 ; CHECK-NEXT: csrr a0, vlenb
374 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
375 ; CHECK-NEXT: vid.v v12
376 ; CHECK-NEXT: srli a1, a0, 1
377 ; CHECK-NEXT: slli a0, a0, 1
378 ; CHECK-NEXT: addi a1, a1, -1
379 ; CHECK-NEXT: addi a0, a0, -32
380 ; CHECK-NEXT: vrsub.vx v12, v12, a1
381 ; CHECK-NEXT: vrgather.vv v19, v8, v12
382 ; CHECK-NEXT: vrgather.vv v18, v9, v12
383 ; CHECK-NEXT: vrgather.vv v17, v10, v12
384 ; CHECK-NEXT: vrgather.vv v16, v11, v12
385 ; CHECK-NEXT: li a1, 32
386 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
387 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
389 %res = shufflevector <32 x i16> %a, <32 x i16> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
393 define <1 x i32> @reverse_v1i32(<1 x i32> %a) {
394 ; CHECK-LABEL: reverse_v1i32:
397 %res = shufflevector <1 x i32> %a, <1 x i32> poison, <1 x i32> <i32 0>
401 define <2 x i32> @reverse_v2i32(<2 x i32> %a) {
402 ; NO-ZVBB-LABEL: reverse_v2i32:
404 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
405 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
406 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
407 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
410 ; ZVBB-LABEL: reverse_v2i32:
412 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
413 ; ZVBB-NEXT: vror.vi v8, v8, 32
415 %res = shufflevector <2 x i32> %a, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
419 define <4 x i32> @reverse_v4i32(<4 x i32> %a) {
420 ; CHECK-LABEL: reverse_v4i32:
422 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
423 ; CHECK-NEXT: vid.v v9
424 ; CHECK-NEXT: vrsub.vi v10, v9, 3
425 ; CHECK-NEXT: vrgather.vv v9, v8, v10
426 ; CHECK-NEXT: vmv.v.v v8, v9
428 %res = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
432 define <8 x i32> @reverse_v8i32(<8 x i32> %a) {
433 ; CHECK-LABEL: reverse_v8i32:
435 ; CHECK-NEXT: csrr a0, vlenb
436 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
437 ; CHECK-NEXT: vid.v v10
438 ; CHECK-NEXT: srli a1, a0, 2
439 ; CHECK-NEXT: srli a0, a0, 1
440 ; CHECK-NEXT: addi a1, a1, -1
441 ; CHECK-NEXT: vrsub.vx v10, v10, a1
442 ; CHECK-NEXT: vrgather.vv v13, v8, v10
443 ; CHECK-NEXT: vrgather.vv v12, v9, v10
444 ; CHECK-NEXT: addi a0, a0, -8
445 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
446 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
448 %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
452 define <16 x i32> @reverse_v16i32(<16 x i32> %a) {
453 ; CHECK-LABEL: reverse_v16i32:
455 ; CHECK-NEXT: csrr a0, vlenb
456 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
457 ; CHECK-NEXT: vid.v v12
458 ; CHECK-NEXT: srli a1, a0, 2
459 ; CHECK-NEXT: addi a1, a1, -1
460 ; CHECK-NEXT: vrsub.vx v16, v12, a1
461 ; CHECK-NEXT: vrgather.vv v15, v8, v16
462 ; CHECK-NEXT: vrgather.vv v14, v9, v16
463 ; CHECK-NEXT: vrgather.vv v13, v10, v16
464 ; CHECK-NEXT: vrgather.vv v12, v11, v16
465 ; CHECK-NEXT: addi a0, a0, -16
466 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
467 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
469 %res = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
473 define <1 x i64> @reverse_v1i64(<1 x i64> %a) {
474 ; CHECK-LABEL: reverse_v1i64:
477 %res = shufflevector <1 x i64> %a, <1 x i64> poison, <1 x i32> <i32 0>
481 define <2 x i64> @reverse_v2i64(<2 x i64> %a) {
482 ; CHECK-LABEL: reverse_v2i64:
484 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
485 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
486 ; CHECK-NEXT: vslideup.vi v9, v8, 1
487 ; CHECK-NEXT: vmv.v.v v8, v9
489 %res = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
493 define <4 x i64> @reverse_v4i64(<4 x i64> %a) {
494 ; CHECK-LABEL: reverse_v4i64:
496 ; CHECK-NEXT: csrr a0, vlenb
497 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
498 ; CHECK-NEXT: vid.v v10
499 ; CHECK-NEXT: srli a1, a0, 3
500 ; CHECK-NEXT: srli a0, a0, 2
501 ; CHECK-NEXT: addi a1, a1, -1
502 ; CHECK-NEXT: vrsub.vx v10, v10, a1
503 ; CHECK-NEXT: vrgather.vv v13, v8, v10
504 ; CHECK-NEXT: vrgather.vv v12, v9, v10
505 ; CHECK-NEXT: addi a0, a0, -4
506 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
507 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
509 %res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
513 define <8 x i64> @reverse_v8i64(<8 x i64> %a) {
514 ; CHECK-LABEL: reverse_v8i64:
516 ; CHECK-NEXT: csrr a0, vlenb
517 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
518 ; CHECK-NEXT: vid.v v12
519 ; CHECK-NEXT: srli a1, a0, 3
520 ; CHECK-NEXT: srli a0, a0, 1
521 ; CHECK-NEXT: addi a1, a1, -1
522 ; CHECK-NEXT: vrsub.vx v12, v12, a1
523 ; CHECK-NEXT: vrgather.vv v19, v8, v12
524 ; CHECK-NEXT: vrgather.vv v18, v9, v12
525 ; CHECK-NEXT: vrgather.vv v17, v10, v12
526 ; CHECK-NEXT: vrgather.vv v16, v11, v12
527 ; CHECK-NEXT: addi a0, a0, -8
528 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
529 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
531 %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
536 define <1 x half> @reverse_v1f16(<1 x half> %a) {
537 ; CHECK-LABEL: reverse_v1f16:
540 %res = shufflevector <1 x half> %a, <1 x half> poison, <1 x i32> <i32 0>
544 define <2 x half> @reverse_v2f16(<2 x half> %a) {
545 ; NO-ZVBB-LABEL: reverse_v2f16:
547 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
548 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
549 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
550 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
553 ; ZVBB-LABEL: reverse_v2f16:
555 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
556 ; ZVBB-NEXT: vror.vi v8, v8, 16
558 %res = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0>
562 define <4 x half> @reverse_v4f16(<4 x half> %a) {
563 ; CHECK-LABEL: reverse_v4f16:
565 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
566 ; CHECK-NEXT: vid.v v9
567 ; CHECK-NEXT: vrsub.vi v10, v9, 3
568 ; CHECK-NEXT: vrgather.vv v9, v8, v10
569 ; CHECK-NEXT: vmv1r.v v8, v9
571 %res = shufflevector <4 x half> %a, <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
575 define <8 x half> @reverse_v8f16(<8 x half> %a) {
576 ; CHECK-LABEL: reverse_v8f16:
578 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
579 ; CHECK-NEXT: vid.v v9
580 ; CHECK-NEXT: vrsub.vi v10, v9, 7
581 ; CHECK-NEXT: vrgather.vv v9, v8, v10
582 ; CHECK-NEXT: vmv.v.v v8, v9
584 %res = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
588 define <16 x half> @reverse_v16f16(<16 x half> %a) {
589 ; CHECK-LABEL: reverse_v16f16:
591 ; CHECK-NEXT: csrr a0, vlenb
592 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
593 ; CHECK-NEXT: vid.v v10
594 ; CHECK-NEXT: srli a1, a0, 1
595 ; CHECK-NEXT: addi a1, a1, -1
596 ; CHECK-NEXT: vrsub.vx v10, v10, a1
597 ; CHECK-NEXT: vrgather.vv v13, v8, v10
598 ; CHECK-NEXT: vrgather.vv v12, v9, v10
599 ; CHECK-NEXT: addi a0, a0, -16
600 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
601 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
603 %res = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
607 define <32 x half> @reverse_v32f16(<32 x half> %a) {
608 ; CHECK-LABEL: reverse_v32f16:
610 ; CHECK-NEXT: csrr a0, vlenb
611 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
612 ; CHECK-NEXT: vid.v v12
613 ; CHECK-NEXT: srli a1, a0, 1
614 ; CHECK-NEXT: slli a0, a0, 1
615 ; CHECK-NEXT: addi a1, a1, -1
616 ; CHECK-NEXT: addi a0, a0, -32
617 ; CHECK-NEXT: vrsub.vx v12, v12, a1
618 ; CHECK-NEXT: vrgather.vv v19, v8, v12
619 ; CHECK-NEXT: vrgather.vv v18, v9, v12
620 ; CHECK-NEXT: vrgather.vv v17, v10, v12
621 ; CHECK-NEXT: vrgather.vv v16, v11, v12
622 ; CHECK-NEXT: li a1, 32
623 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
624 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
626 %res = shufflevector <32 x half> %a, <32 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
630 define <1 x float> @reverse_v1f32(<1 x float> %a) {
631 ; CHECK-LABEL: reverse_v1f32:
634 %res = shufflevector <1 x float> %a, <1 x float> poison, <1 x i32> <i32 0>
638 define <2 x float> @reverse_v2f32(<2 x float> %a) {
639 ; NO-ZVBB-LABEL: reverse_v2f32:
641 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
642 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
643 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
644 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
647 ; ZVBB-LABEL: reverse_v2f32:
649 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
650 ; ZVBB-NEXT: vror.vi v8, v8, 32
652 %res = shufflevector <2 x float> %a, <2 x float> poison, <2 x i32> <i32 1, i32 0>
656 define <4 x float> @reverse_v4f32(<4 x float> %a) {
657 ; CHECK-LABEL: reverse_v4f32:
659 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
660 ; CHECK-NEXT: vid.v v9
661 ; CHECK-NEXT: vrsub.vi v10, v9, 3
662 ; CHECK-NEXT: vrgather.vv v9, v8, v10
663 ; CHECK-NEXT: vmv.v.v v8, v9
665 %res = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
669 define <8 x float> @reverse_v8f32(<8 x float> %a) {
670 ; CHECK-LABEL: reverse_v8f32:
672 ; CHECK-NEXT: csrr a0, vlenb
673 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
674 ; CHECK-NEXT: vid.v v10
675 ; CHECK-NEXT: srli a1, a0, 2
676 ; CHECK-NEXT: srli a0, a0, 1
677 ; CHECK-NEXT: addi a1, a1, -1
678 ; CHECK-NEXT: vrsub.vx v10, v10, a1
679 ; CHECK-NEXT: vrgather.vv v13, v8, v10
680 ; CHECK-NEXT: vrgather.vv v12, v9, v10
681 ; CHECK-NEXT: addi a0, a0, -8
682 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
683 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
685 %res = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
689 define <16 x float> @reverse_v16f32(<16 x float> %a) {
690 ; CHECK-LABEL: reverse_v16f32:
692 ; CHECK-NEXT: csrr a0, vlenb
693 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
694 ; CHECK-NEXT: vid.v v12
695 ; CHECK-NEXT: srli a1, a0, 2
696 ; CHECK-NEXT: addi a1, a1, -1
697 ; CHECK-NEXT: vrsub.vx v16, v12, a1
698 ; CHECK-NEXT: vrgather.vv v15, v8, v16
699 ; CHECK-NEXT: vrgather.vv v14, v9, v16
700 ; CHECK-NEXT: vrgather.vv v13, v10, v16
701 ; CHECK-NEXT: vrgather.vv v12, v11, v16
702 ; CHECK-NEXT: addi a0, a0, -16
703 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
704 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
706 %res = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
707 ret <16 x float> %res
710 define <1 x double> @reverse_v1f64(<1 x double> %a) {
711 ; CHECK-LABEL: reverse_v1f64:
714 %res = shufflevector <1 x double> %a, <1 x double> poison, <1 x i32> <i32 0>
715 ret <1 x double> %res
718 define <2 x double> @reverse_v2f64(<2 x double> %a) {
719 ; CHECK-LABEL: reverse_v2f64:
721 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
722 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
723 ; CHECK-NEXT: vslideup.vi v9, v8, 1
724 ; CHECK-NEXT: vmv.v.v v8, v9
726 %res = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0>
727 ret <2 x double> %res
730 define <4 x double> @reverse_v4f64(<4 x double> %a) {
731 ; CHECK-LABEL: reverse_v4f64:
733 ; CHECK-NEXT: csrr a0, vlenb
734 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
735 ; CHECK-NEXT: vid.v v10
736 ; CHECK-NEXT: srli a1, a0, 3
737 ; CHECK-NEXT: srli a0, a0, 2
738 ; CHECK-NEXT: addi a1, a1, -1
739 ; CHECK-NEXT: vrsub.vx v10, v10, a1
740 ; CHECK-NEXT: vrgather.vv v13, v8, v10
741 ; CHECK-NEXT: vrgather.vv v12, v9, v10
742 ; CHECK-NEXT: addi a0, a0, -4
743 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
744 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
746 %res = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
747 ret <4 x double> %res
750 define <8 x double> @reverse_v8f64(<8 x double> %a) {
751 ; CHECK-LABEL: reverse_v8f64:
753 ; CHECK-NEXT: csrr a0, vlenb
754 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
755 ; CHECK-NEXT: vid.v v12
756 ; CHECK-NEXT: srli a1, a0, 3
757 ; CHECK-NEXT: srli a0, a0, 1
758 ; CHECK-NEXT: addi a1, a1, -1
759 ; CHECK-NEXT: vrsub.vx v12, v12, a1
760 ; CHECK-NEXT: vrgather.vv v19, v8, v12
761 ; CHECK-NEXT: vrgather.vv v18, v9, v12
762 ; CHECK-NEXT: vrgather.vv v17, v10, v12
763 ; CHECK-NEXT: vrgather.vv v16, v11, v12
764 ; CHECK-NEXT: addi a0, a0, -8
765 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
766 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
768 %res = shufflevector <8 x double> %a, <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
769 ret <8 x double> %res
773 define <3 x i64> @reverse_v3i64(<3 x i64> %a) {
774 ; CHECK-LABEL: reverse_v3i64:
776 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
777 ; CHECK-NEXT: vid.v v10
778 ; CHECK-NEXT: vrsub.vi v12, v10, 2
779 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
780 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
781 ; CHECK-NEXT: vmv.v.v v8, v10
783 %res = shufflevector <3 x i64> %a, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 0>
787 define <6 x i64> @reverse_v6i64(<6 x i64> %a) {
788 ; CHECK-LABEL: reverse_v6i64:
790 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
791 ; CHECK-NEXT: vid.v v12
792 ; CHECK-NEXT: vrsub.vi v16, v12, 5
793 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
794 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
795 ; CHECK-NEXT: vmv.v.v v8, v12
797 %res = shufflevector <6 x i64> %a, <6 x i64> poison, <6 x i32> <i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
801 define <12 x i64> @reverse_v12i64(<12 x i64> %a) {
802 ; CHECK-LABEL: reverse_v12i64:
804 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
805 ; CHECK-NEXT: vid.v v16
806 ; CHECK-NEXT: vrsub.vi v24, v16, 11
807 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
808 ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
809 ; CHECK-NEXT: vmv.v.v v8, v16
811 %res = shufflevector <12 x i64> %a, <12 x i64> poison, <12 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
815 define <4 x i8> @reverse_v4i8_2(<2 x i8> %a, <2 x i8> %b) {
816 ; NO-ZVBB-LABEL: reverse_v4i8_2:
818 ; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
819 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
820 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
821 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
822 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
823 ; NO-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
824 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
827 ; ZVBB-LABEL: reverse_v4i8_2:
829 ; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
830 ; ZVBB-NEXT: vrev8.v v10, v8
831 ; ZVBB-NEXT: vrev8.v v8, v9
832 ; ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
833 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
835 %res = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
839 define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) {
840 ; CHECK-LABEL: reverse_v8i8_2:
842 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
843 ; CHECK-NEXT: vid.v v11
844 ; CHECK-NEXT: vrsub.vi v12, v11, 7
845 ; CHECK-NEXT: vrgather.vv v10, v8, v12
846 ; CHECK-NEXT: vmv.v.i v0, 15
847 ; CHECK-NEXT: vrsub.vi v8, v11, 3
848 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
849 ; CHECK-NEXT: vmv1r.v v8, v10
851 %res = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
855 define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) {
856 ; CHECK-LABEL: reverse_v16i8_2:
858 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
859 ; CHECK-NEXT: vid.v v11
860 ; CHECK-NEXT: li a0, 255
861 ; CHECK-NEXT: vrsub.vi v12, v11, 15
862 ; CHECK-NEXT: vrgather.vv v10, v8, v12
863 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
864 ; CHECK-NEXT: vmv.s.x v0, a0
865 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
866 ; CHECK-NEXT: vrsub.vi v8, v11, 7
867 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
868 ; CHECK-NEXT: vmv.v.v v8, v10
870 %res = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
874 define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
875 ; CHECK-LABEL: reverse_v32i8_2:
877 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
878 ; CHECK-NEXT: vmv1r.v v10, v9
879 ; CHECK-NEXT: csrr a0, vlenb
880 ; CHECK-NEXT: vid.v v12
881 ; CHECK-NEXT: addi a1, a0, -1
882 ; CHECK-NEXT: vrsub.vx v12, v12, a1
883 ; CHECK-NEXT: lui a1, 16
884 ; CHECK-NEXT: addi a1, a1, -1
885 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
886 ; CHECK-NEXT: vrgatherei16.vv v15, v8, v12
887 ; CHECK-NEXT: vrgatherei16.vv v14, v9, v12
888 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
889 ; CHECK-NEXT: vmv.s.x v0, a1
890 ; CHECK-NEXT: li a1, 32
891 ; CHECK-NEXT: slli a0, a0, 1
892 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
893 ; CHECK-NEXT: vid.v v8
894 ; CHECK-NEXT: addi a0, a0, -32
895 ; CHECK-NEXT: vrsub.vi v12, v8, 15
896 ; CHECK-NEXT: vslidedown.vx v8, v14, a0
897 ; CHECK-NEXT: vrgather.vv v8, v10, v12, v0.t
899 %res = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
903 define <4 x i16> @reverse_v4i16_2(<2 x i16> %a, <2 x i16> %b) {
904 ; NO-ZVBB-LABEL: reverse_v4i16_2:
906 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
907 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
908 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
909 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
910 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
911 ; NO-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
912 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
915 ; ZVBB-LABEL: reverse_v4i16_2:
917 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
918 ; ZVBB-NEXT: vror.vi v10, v8, 16
919 ; ZVBB-NEXT: vror.vi v8, v9, 16
920 ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
921 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
923 %res = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
927 define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) {
928 ; CHECK-LABEL: reverse_v8i16_2:
930 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
931 ; CHECK-NEXT: vid.v v11
932 ; CHECK-NEXT: vrsub.vi v12, v11, 7
933 ; CHECK-NEXT: vrgather.vv v10, v8, v12
934 ; CHECK-NEXT: vmv.v.i v0, 15
935 ; CHECK-NEXT: vrsub.vi v8, v11, 3
936 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
937 ; CHECK-NEXT: vmv.v.v v8, v10
939 %res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
943 define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
944 ; CHECK-LABEL: reverse_v16i16_2:
946 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
947 ; CHECK-NEXT: vmv1r.v v10, v9
948 ; CHECK-NEXT: csrr a0, vlenb
949 ; CHECK-NEXT: vid.v v9
950 ; CHECK-NEXT: srli a1, a0, 1
951 ; CHECK-NEXT: addi a1, a1, -1
952 ; CHECK-NEXT: vrsub.vx v9, v9, a1
953 ; CHECK-NEXT: vrgather.vv v13, v8, v9
954 ; CHECK-NEXT: vrgather.vv v12, v11, v9
955 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
956 ; CHECK-NEXT: vid.v v8
957 ; CHECK-NEXT: li a1, 255
958 ; CHECK-NEXT: addi a0, a0, -16
959 ; CHECK-NEXT: vrsub.vi v14, v8, 7
960 ; CHECK-NEXT: vmv.s.x v0, a1
961 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
962 ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
964 %res = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
968 define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) {
969 ; CHECK-LABEL: reverse_v32i16_2:
971 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
972 ; CHECK-NEXT: vmv2r.v v12, v10
973 ; CHECK-NEXT: csrr a0, vlenb
974 ; CHECK-NEXT: vid.v v10
975 ; CHECK-NEXT: lui a1, 16
976 ; CHECK-NEXT: addi a1, a1, -1
977 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
978 ; CHECK-NEXT: vmv.s.x v0, a1
979 ; CHECK-NEXT: srli a1, a0, 1
980 ; CHECK-NEXT: addi a1, a1, -1
981 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
982 ; CHECK-NEXT: vrsub.vx v10, v10, a1
983 ; CHECK-NEXT: li a1, 32
984 ; CHECK-NEXT: slli a0, a0, 1
985 ; CHECK-NEXT: vrgather.vv v19, v8, v10
986 ; CHECK-NEXT: vrgather.vv v18, v9, v10
987 ; CHECK-NEXT: vrgather.vv v16, v11, v10
988 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
989 ; CHECK-NEXT: vid.v v8
990 ; CHECK-NEXT: addi a0, a0, -32
991 ; CHECK-NEXT: vrsub.vi v20, v8, 15
992 ; CHECK-NEXT: vmv1r.v v17, v16
993 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
994 ; CHECK-NEXT: vrgather.vv v8, v12, v20, v0.t
996 %res = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1000 define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) {
1001 ; NO-ZVBB-LABEL: reverse_v4i32_2:
1003 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1004 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1005 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1006 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1007 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1008 ; NO-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1009 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1012 ; ZVBB-LABEL: reverse_v4i32_2:
1014 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1015 ; ZVBB-NEXT: vror.vi v10, v8, 32
1016 ; ZVBB-NEXT: vror.vi v8, v9, 32
1017 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1018 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1020 %res = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1024 define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
1025 ; CHECK-LABEL: reverse_v8i32_2:
1027 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1028 ; CHECK-NEXT: vmv1r.v v10, v9
1029 ; CHECK-NEXT: csrr a0, vlenb
1030 ; CHECK-NEXT: vid.v v9
1031 ; CHECK-NEXT: srli a1, a0, 2
1032 ; CHECK-NEXT: addi a1, a1, -1
1033 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1034 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1035 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1036 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1037 ; CHECK-NEXT: vid.v v8
1038 ; CHECK-NEXT: vmv.v.i v0, 15
1039 ; CHECK-NEXT: srli a0, a0, 1
1040 ; CHECK-NEXT: vrsub.vi v14, v8, 3
1041 ; CHECK-NEXT: addi a0, a0, -8
1042 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1043 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1044 ; CHECK-NEXT: vrgatherei16.vv v8, v10, v14, v0.t
1046 %res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1050 define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) {
1051 ; CHECK-LABEL: reverse_v16i32_2:
1053 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1054 ; CHECK-NEXT: vmv2r.v v12, v10
1055 ; CHECK-NEXT: csrr a0, vlenb
1056 ; CHECK-NEXT: vid.v v10
1057 ; CHECK-NEXT: srli a1, a0, 2
1058 ; CHECK-NEXT: addi a1, a1, -1
1059 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1060 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1061 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1062 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1063 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1064 ; CHECK-NEXT: vid.v v14
1065 ; CHECK-NEXT: li a1, 255
1066 ; CHECK-NEXT: addi a0, a0, -16
1067 ; CHECK-NEXT: vrsub.vi v16, v14, 7
1068 ; CHECK-NEXT: vmv.s.x v0, a1
1069 ; CHECK-NEXT: vmv1r.v v9, v8
1070 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1071 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1072 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1074 %res = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1078 define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) {
1079 ; CHECK-LABEL: reverse_v32i32_2:
1081 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1082 ; CHECK-NEXT: vmv4r.v v16, v12
1083 ; CHECK-NEXT: csrr a0, vlenb
1084 ; CHECK-NEXT: vid.v v12
1085 ; CHECK-NEXT: srli a1, a0, 2
1086 ; CHECK-NEXT: addi a1, a1, -1
1087 ; CHECK-NEXT: vrsub.vx v20, v12, a1
1088 ; CHECK-NEXT: vrgather.vv v15, v8, v20
1089 ; CHECK-NEXT: vrgather.vv v14, v9, v20
1090 ; CHECK-NEXT: vrgather.vv v13, v10, v20
1091 ; CHECK-NEXT: vrgather.vv v12, v11, v20
1092 ; CHECK-NEXT: lui a1, 16
1093 ; CHECK-NEXT: addi a1, a1, -1
1094 ; CHECK-NEXT: vmv.s.x v0, a1
1095 ; CHECK-NEXT: li a1, 32
1096 ; CHECK-NEXT: slli a0, a0, 1
1097 ; CHECK-NEXT: vrgather.vv v8, v9, v20
1098 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
1099 ; CHECK-NEXT: vid.v v20
1100 ; CHECK-NEXT: addi a0, a0, -32
1101 ; CHECK-NEXT: vmv1r.v v9, v8
1102 ; CHECK-NEXT: vrsub.vi v24, v20, 15
1103 ; CHECK-NEXT: vmv2r.v v10, v8
1104 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1105 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1106 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
1108 %res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1112 define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) {
1113 ; CHECK-LABEL: reverse_v4i64_2:
1115 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1116 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
1117 ; CHECK-NEXT: vslideup.vi v10, v8, 1
1118 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
1119 ; CHECK-NEXT: vslideup.vi v8, v9, 1
1120 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1121 ; CHECK-NEXT: vslideup.vi v8, v10, 2
1123 %res = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1127 define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) {
1128 ; CHECK-LABEL: reverse_v8i64_2:
1130 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1131 ; CHECK-NEXT: vmv2r.v v12, v10
1132 ; CHECK-NEXT: csrr a0, vlenb
1133 ; CHECK-NEXT: vid.v v10
1134 ; CHECK-NEXT: srli a1, a0, 3
1135 ; CHECK-NEXT: addi a1, a1, -1
1136 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1137 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1138 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1139 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1140 ; CHECK-NEXT: vid.v v15
1141 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1142 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1143 ; CHECK-NEXT: vmv.v.i v0, 15
1144 ; CHECK-NEXT: srli a0, a0, 1
1145 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1146 ; CHECK-NEXT: vrsub.vi v16, v15, 3
1147 ; CHECK-NEXT: addi a0, a0, -8
1148 ; CHECK-NEXT: vmv1r.v v9, v8
1149 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1150 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1151 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1153 %res = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1157 define <4 x half> @reverse_v4f16_2(<2 x half> %a, <2 x half> %b) {
1158 ; NO-ZVBB-LABEL: reverse_v4f16_2:
1160 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1161 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1162 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1163 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1164 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1165 ; NO-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1166 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1169 ; ZVBB-LABEL: reverse_v4f16_2:
1171 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1172 ; ZVBB-NEXT: vror.vi v10, v8, 16
1173 ; ZVBB-NEXT: vror.vi v8, v9, 16
1174 ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1175 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1177 %res = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1181 define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) {
1182 ; CHECK-LABEL: reverse_v8f16_2:
1184 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1185 ; CHECK-NEXT: vid.v v11
1186 ; CHECK-NEXT: vrsub.vi v12, v11, 7
1187 ; CHECK-NEXT: vrgather.vv v10, v8, v12
1188 ; CHECK-NEXT: vmv.v.i v0, 15
1189 ; CHECK-NEXT: vrsub.vi v8, v11, 3
1190 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
1191 ; CHECK-NEXT: vmv.v.v v8, v10
1193 %res = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1197 define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) {
1198 ; CHECK-LABEL: reverse_v16f16_2:
1200 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1201 ; CHECK-NEXT: vmv1r.v v10, v9
1202 ; CHECK-NEXT: csrr a0, vlenb
1203 ; CHECK-NEXT: vid.v v9
1204 ; CHECK-NEXT: srli a1, a0, 1
1205 ; CHECK-NEXT: addi a1, a1, -1
1206 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1207 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1208 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1209 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
1210 ; CHECK-NEXT: vid.v v8
1211 ; CHECK-NEXT: li a1, 255
1212 ; CHECK-NEXT: addi a0, a0, -16
1213 ; CHECK-NEXT: vrsub.vi v14, v8, 7
1214 ; CHECK-NEXT: vmv.s.x v0, a1
1215 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1216 ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
1218 %res = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1219 ret <16 x half> %res
1222 define <32 x half> @reverse_v32f16_2(<16 x half> %a) {
1223 ; CHECK-LABEL: reverse_v32f16_2:
1225 ; CHECK-NEXT: csrr a0, vlenb
1226 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1227 ; CHECK-NEXT: vid.v v10
1228 ; CHECK-NEXT: srli a1, a0, 1
1229 ; CHECK-NEXT: addi a1, a1, -1
1230 ; CHECK-NEXT: vrsub.vx v12, v10, a1
1231 ; CHECK-NEXT: vrgather.vv v11, v8, v12
1232 ; CHECK-NEXT: vrgather.vv v10, v9, v12
1233 ; CHECK-NEXT: li a1, 32
1234 ; CHECK-NEXT: slli a0, a0, 1
1235 ; CHECK-NEXT: vrgather.vv v8, v9, v12
1236 ; CHECK-NEXT: addi a0, a0, -32
1237 ; CHECK-NEXT: vmv.v.v v9, v8
1238 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
1239 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1241 %res = shufflevector <16 x half> %a, <16 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1242 ret <32 x half> %res
1245 define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) {
1246 ; NO-ZVBB-LABEL: reverse_v4f32_2:
1248 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1249 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1250 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1251 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1252 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1253 ; NO-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1254 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1257 ; ZVBB-LABEL: reverse_v4f32_2:
1259 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1260 ; ZVBB-NEXT: vror.vi v10, v8, 32
1261 ; ZVBB-NEXT: vror.vi v8, v9, 32
1262 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1263 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1265 %res = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1266 ret <4 x float> %res
1269 define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) {
1270 ; CHECK-LABEL: reverse_v8f32_2:
1272 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1273 ; CHECK-NEXT: vmv1r.v v10, v9
1274 ; CHECK-NEXT: csrr a0, vlenb
1275 ; CHECK-NEXT: vid.v v9
1276 ; CHECK-NEXT: srli a1, a0, 2
1277 ; CHECK-NEXT: addi a1, a1, -1
1278 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1279 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1280 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1281 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1282 ; CHECK-NEXT: vid.v v8
1283 ; CHECK-NEXT: vmv.v.i v0, 15
1284 ; CHECK-NEXT: srli a0, a0, 1
1285 ; CHECK-NEXT: vrsub.vi v14, v8, 3
1286 ; CHECK-NEXT: addi a0, a0, -8
1287 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1288 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1289 ; CHECK-NEXT: vrgatherei16.vv v8, v10, v14, v0.t
1291 %res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1292 ret <8 x float> %res
1295 define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) {
1296 ; CHECK-LABEL: reverse_v16f32_2:
1298 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1299 ; CHECK-NEXT: vmv2r.v v12, v10
1300 ; CHECK-NEXT: csrr a0, vlenb
1301 ; CHECK-NEXT: vid.v v10
1302 ; CHECK-NEXT: srli a1, a0, 2
1303 ; CHECK-NEXT: addi a1, a1, -1
1304 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1305 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1306 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1307 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1308 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1309 ; CHECK-NEXT: vid.v v14
1310 ; CHECK-NEXT: li a1, 255
1311 ; CHECK-NEXT: addi a0, a0, -16
1312 ; CHECK-NEXT: vrsub.vi v16, v14, 7
1313 ; CHECK-NEXT: vmv.s.x v0, a1
1314 ; CHECK-NEXT: vmv1r.v v9, v8
1315 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1316 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1317 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1319 %res = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1320 ret <16 x float> %res
1323 define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) {
1324 ; CHECK-LABEL: reverse_v4f64_2:
1326 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1327 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
1328 ; CHECK-NEXT: vslideup.vi v10, v8, 1
1329 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
1330 ; CHECK-NEXT: vslideup.vi v8, v9, 1
1331 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1332 ; CHECK-NEXT: vslideup.vi v8, v10, 2
1334 %res = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1335 ret <4 x double> %res
1338 define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) {
1339 ; CHECK-LABEL: reverse_v8f64_2:
1341 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1342 ; CHECK-NEXT: vmv2r.v v12, v10
1343 ; CHECK-NEXT: csrr a0, vlenb
1344 ; CHECK-NEXT: vid.v v10
1345 ; CHECK-NEXT: srli a1, a0, 3
1346 ; CHECK-NEXT: addi a1, a1, -1
1347 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1348 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1349 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1350 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1351 ; CHECK-NEXT: vid.v v15
1352 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1353 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1354 ; CHECK-NEXT: vmv.v.i v0, 15
1355 ; CHECK-NEXT: srli a0, a0, 1
1356 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1357 ; CHECK-NEXT: vrsub.vi v16, v15, 3
1358 ; CHECK-NEXT: addi a0, a0, -8
1359 ; CHECK-NEXT: vmv1r.v v9, v8
1360 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1361 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1362 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1364 %res = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1365 ret <8 x double> %res
1368 ; There is no corresponding v1i256 type, so make sure we don't crash if we try
1369 ; to lower via lowerBitreverseShuffle.
1370 define <256 x i1> @reverse_v256i1(<256 x i1> %a) vscale_range(16, 1024) {
1371 ; CHECK-LABEL: reverse_v256i1:
1373 ; CHECK-NEXT: li a0, 256
1374 ; CHECK-NEXT: csrr a1, vlenb
1375 ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1376 ; CHECK-NEXT: vid.v v8
1377 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1378 ; CHECK-NEXT: vmv.v.i v10, 0
1379 ; CHECK-NEXT: addi a2, a1, -1
1380 ; CHECK-NEXT: slli a1, a1, 1
1381 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
1382 ; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma
1383 ; CHECK-NEXT: vrsub.vx v8, v8, a2
1384 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1385 ; CHECK-NEXT: vrgatherei16.vv v13, v10, v8
1386 ; CHECK-NEXT: vrgatherei16.vv v12, v11, v8
1387 ; CHECK-NEXT: addi a1, a1, -256
1388 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1389 ; CHECK-NEXT: vslidedown.vx v8, v12, a1
1390 ; CHECK-NEXT: vmsne.vi v0, v8, 0
1392 %res = shufflevector <256 x i1> %a, <256 x i1> poison, <256 x i32> <i32 255, i32 254, i32 253, i32 252, i32 251, i32 250, i32 249, i32 248, i32 247, i32 246, i32 245, i32 244, i32 243, i32 242, i32 241, i32 240, i32 239, i32 238, i32 237, i32 236, i32 235, i32 234, i32 233, i32 232, i32 231, i32 230, i32 229, i32 228, i32 227, i32 226, i32 225, i32 224, i32 223, i32 222, i32 221, i32 220, i32 219, i32 218, i32 217, i32 216, i32 215, i32 214, i32 213, i32 212, i32 211, i32 210, i32 209, i32 208, i32 207, i32 206, i32 205, i32 204, i32 203, i32 202, i32 201, i32 200, i32 199, i32 198, i32 197, i32 196, i32 195, i32 194, i32 193, i32 192, i32 191, i32 190, i32 189, i32 188, i32 187, i32 186, i32 185, i32 184, i32 183, i32 182, i32 181, i32 180, i32 179, i32 178, i32 177, i32 176, i32 175, i32 174, i32 173, i32 172, i32 171, i32 170, i32 169, i32 168, i32 167, i32 166, i32 165, i32 164, i32 163, i32 162, i32 161, i32 160, i32 159, i32 158, i32 157, i32 156, i32 155, i32 154, i32 153, i32 152, i32 151, i32 150, i32 149, i32 148, i32 147, i32 146, i32 145, i32 144, i32 143, i32 142, i32 141, i32 140, i32 139, i32 138, i32 137, i32 136, i32 135, i32 134, i32 133, i32 132, i32 131, i32 130, i32 129, i32 128, i32 127, i32 126, i32 125, i32 124, i32 123, i32 122, i32 121, i32 120, i32 119, i32 118, i32 117, i32 116, i32 115, i32 114, i32 113, i32 112, i32 111, i32 110, i32 109, i32 108, i32 107, i32 106, i32 105, i32 104, i32 103, i32 102, i32 101, i32 100, i32 99, i32 98, i32 97, i32 96, i32 95, i32 94, i32 93, i32 92, i32 91, i32 90, i32 89, i32 88, i32 87, i32 86, i32 85, i32 84, i32 83, i32 82, i32 81, i32 80, i32 79, i32 78, i32 77, i32 76, i32 75, i32 74, i32 73, i32 72, i32 71, i32 70, i32 69, i32 68, i32 67, i32 66, i32 65, i32 64, i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1396 define <8 x i32> @reverse_v8i32_exact_vlen_128(<8 x i32> %a) vscale_range(2, 2) {
1397 ; CHECK-LABEL: reverse_v8i32_exact_vlen_128:
1399 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1400 ; CHECK-NEXT: vid.v v10
1401 ; CHECK-NEXT: vrsub.vi v12, v10, 3
1402 ; CHECK-NEXT: vrgather.vv v11, v8, v12
1403 ; CHECK-NEXT: vrgather.vv v10, v9, v12
1404 ; CHECK-NEXT: vmv2r.v v8, v10
1406 %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1410 define <16 x i32> @reverse_v16i32_exact_vlen_256(<16 x i32> %a) vscale_range(4, 4) {
1411 ; CHECK-LABEL: reverse_v16i32_exact_vlen_256:
1413 ; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
1414 ; CHECK-NEXT: vid.v v10
1415 ; CHECK-NEXT: vrsub.vi v12, v10, 7
1416 ; CHECK-NEXT: vrgather.vv v11, v8, v12
1417 ; CHECK-NEXT: vrgather.vv v10, v9, v12
1418 ; CHECK-NEXT: vmv2r.v v8, v10
1420 %res = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1423 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: