1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV32-ZVBB
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV64-ZVBB
7 define <2 x i1> @reverse_v2i1(<2 x i1> %a) {
8 ; NO-ZVBB-LABEL: reverse_v2i1:
10 ; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
11 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
12 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
13 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
14 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
15 ; NO-ZVBB-NEXT: vmsne.vi v0, v9, 0
18 ; ZVBB-LABEL: reverse_v2i1:
20 ; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
21 ; ZVBB-NEXT: vbrev.v v8, v0
22 ; ZVBB-NEXT: vsrl.vi v0, v8, 6
24 %res = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> <i32 1, i32 0>
28 define <4 x i1> @reverse_v4i1(<4 x i1> %a) {
29 ; NO-ZVBB-LABEL: reverse_v4i1:
31 ; NO-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
32 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
33 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
34 ; NO-ZVBB-NEXT: vid.v v9
35 ; NO-ZVBB-NEXT: vrsub.vi v9, v9, 3
36 ; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9
37 ; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0
40 ; ZVBB-LABEL: reverse_v4i1:
42 ; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
43 ; ZVBB-NEXT: vbrev.v v8, v0
44 ; ZVBB-NEXT: vsrl.vi v0, v8, 4
46 %res = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
50 define <8 x i1> @reverse_v8i1(<8 x i1> %a) {
51 ; NO-ZVBB-LABEL: reverse_v8i1:
53 ; NO-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
54 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
55 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
56 ; NO-ZVBB-NEXT: vid.v v9
57 ; NO-ZVBB-NEXT: vrsub.vi v9, v9, 7
58 ; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9
59 ; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0
62 ; ZVBB-LABEL: reverse_v8i1:
64 ; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
65 ; ZVBB-NEXT: vbrev.v v0, v0
67 %res = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
71 define <16 x i1> @reverse_v16i1(<16 x i1> %a) {
72 ; NO-ZVBB-LABEL: reverse_v16i1:
74 ; NO-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
75 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
76 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
77 ; NO-ZVBB-NEXT: vid.v v9
78 ; NO-ZVBB-NEXT: vrsub.vi v9, v9, 15
79 ; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9
80 ; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0
83 ; ZVBB-LABEL: reverse_v16i1:
85 ; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
86 ; ZVBB-NEXT: vbrev.v v0, v0
88 %res = shufflevector <16 x i1> %a, <16 x i1> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
92 define <32 x i1> @reverse_v32i1(<32 x i1> %a) {
93 ; NO-ZVBB-LABEL: reverse_v32i1:
95 ; NO-ZVBB-NEXT: li a0, 32
96 ; NO-ZVBB-NEXT: csrr a1, vlenb
97 ; NO-ZVBB-NEXT: vsetvli a2, zero, e16, m2, ta, ma
98 ; NO-ZVBB-NEXT: vid.v v8
99 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
100 ; NO-ZVBB-NEXT: vmv.v.i v10, 0
101 ; NO-ZVBB-NEXT: addi a2, a1, -1
102 ; NO-ZVBB-NEXT: slli a1, a1, 1
103 ; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
104 ; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma
105 ; NO-ZVBB-NEXT: vrsub.vx v8, v8, a2
106 ; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma
107 ; NO-ZVBB-NEXT: vrgatherei16.vv v13, v10, v8
108 ; NO-ZVBB-NEXT: vrgatherei16.vv v12, v11, v8
109 ; NO-ZVBB-NEXT: addi a1, a1, -32
110 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
111 ; NO-ZVBB-NEXT: vslidedown.vx v8, v12, a1
112 ; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0
115 ; ZVBB-LABEL: reverse_v32i1:
117 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
118 ; ZVBB-NEXT: vbrev.v v0, v0
120 %res = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
124 define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
125 ; NO-ZVBB-LABEL: reverse_v64i1:
127 ; NO-ZVBB-NEXT: li a0, 64
128 ; NO-ZVBB-NEXT: csrr a1, vlenb
129 ; NO-ZVBB-NEXT: vsetvli a2, zero, e16, m2, ta, ma
130 ; NO-ZVBB-NEXT: vid.v v12
131 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
132 ; NO-ZVBB-NEXT: vmv.v.i v8, 0
133 ; NO-ZVBB-NEXT: addi a2, a1, -1
134 ; NO-ZVBB-NEXT: slli a1, a1, 2
135 ; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
136 ; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma
137 ; NO-ZVBB-NEXT: vrsub.vx v12, v12, a2
138 ; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma
139 ; NO-ZVBB-NEXT: vrgatherei16.vv v19, v8, v12
140 ; NO-ZVBB-NEXT: vrgatherei16.vv v18, v9, v12
141 ; NO-ZVBB-NEXT: vrgatherei16.vv v17, v10, v12
142 ; NO-ZVBB-NEXT: vrgatherei16.vv v16, v11, v12
143 ; NO-ZVBB-NEXT: addi a1, a1, -64
144 ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
145 ; NO-ZVBB-NEXT: vslidedown.vx v8, v16, a1
146 ; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0
149 ; ZVBB-LABEL: reverse_v64i1:
151 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
152 ; ZVBB-NEXT: vbrev.v v0, v0
154 %res = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
158 define <128 x i1> @reverse_v128i1(<128 x i1> %a) {
159 ; CHECK-LABEL: reverse_v128i1:
161 ; CHECK-NEXT: li a0, 128
162 ; CHECK-NEXT: csrr a1, vlenb
163 ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
164 ; CHECK-NEXT: vid.v v8
165 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
166 ; CHECK-NEXT: vmv.v.i v16, 0
167 ; CHECK-NEXT: addi a2, a1, -1
168 ; CHECK-NEXT: slli a1, a1, 3
169 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
170 ; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma
171 ; CHECK-NEXT: vrsub.vx v24, v8, a2
172 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
173 ; CHECK-NEXT: vrgatherei16.vv v15, v16, v24
174 ; CHECK-NEXT: vrgatherei16.vv v14, v17, v24
175 ; CHECK-NEXT: vrgatherei16.vv v13, v18, v24
176 ; CHECK-NEXT: vrgatherei16.vv v12, v19, v24
177 ; CHECK-NEXT: vrgatherei16.vv v11, v20, v24
178 ; CHECK-NEXT: vrgatherei16.vv v10, v21, v24
179 ; CHECK-NEXT: vrgatherei16.vv v9, v22, v24
180 ; CHECK-NEXT: vrgatherei16.vv v8, v23, v24
181 ; CHECK-NEXT: addi a1, a1, -128
182 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
183 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
184 ; CHECK-NEXT: vmsne.vi v0, v8, 0
186 %res = shufflevector <128 x i1> %a, <128 x i1> poison, <128 x i32> <i32 127, i32 126, i32 125, i32 124, i32 123, i32 122, i32 121, i32 120, i32 119, i32 118, i32 117, i32 116, i32 115, i32 114, i32 113, i32 112, i32 111, i32 110, i32 109, i32 108, i32 107, i32 106, i32 105, i32 104, i32 103, i32 102, i32 101, i32 100, i32 99, i32 98, i32 97, i32 96, i32 95, i32 94, i32 93, i32 92, i32 91, i32 90, i32 89, i32 88, i32 87, i32 86, i32 85, i32 84, i32 83, i32 82, i32 81, i32 80, i32 79, i32 78, i32 77, i32 76, i32 75, i32 74, i32 73, i32 72, i32 71, i32 70, i32 69, i32 68, i32 67, i32 66, i32 65, i32 64, i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
190 define <1 x i8> @reverse_v1i8(<1 x i8> %a) {
191 ; CHECK-LABEL: reverse_v1i8:
194 %res = shufflevector <1 x i8> %a, <1 x i8> poison, <1 x i32> <i32 0>
198 define <2 x i8> @reverse_v2i8(<2 x i8> %a) {
199 ; NO-ZVBB-LABEL: reverse_v2i8:
201 ; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
202 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
203 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
204 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
207 ; ZVBB-LABEL: reverse_v2i8:
209 ; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
210 ; ZVBB-NEXT: vrev8.v v8, v8
212 %res = shufflevector <2 x i8> %a, <2 x i8> poison, <2 x i32> <i32 1, i32 0>
216 define <4 x i8> @reverse_v4i8(<4 x i8> %a) {
217 ; CHECK-LABEL: reverse_v4i8:
219 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
220 ; CHECK-NEXT: vid.v v9
221 ; CHECK-NEXT: vrsub.vi v10, v9, 3
222 ; CHECK-NEXT: vrgather.vv v9, v8, v10
223 ; CHECK-NEXT: vmv1r.v v8, v9
225 %res = shufflevector <4 x i8> %a, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
229 define <8 x i8> @reverse_v8i8(<8 x i8> %a) {
230 ; CHECK-LABEL: reverse_v8i8:
232 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
233 ; CHECK-NEXT: vid.v v9
234 ; CHECK-NEXT: vrsub.vi v10, v9, 7
235 ; CHECK-NEXT: vrgather.vv v9, v8, v10
236 ; CHECK-NEXT: vmv1r.v v8, v9
238 %res = shufflevector <8 x i8> %a, <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
242 define <16 x i8> @reverse_v16i8(<16 x i8> %a) {
243 ; CHECK-LABEL: reverse_v16i8:
245 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
246 ; CHECK-NEXT: vid.v v9
247 ; CHECK-NEXT: vrsub.vi v10, v9, 15
248 ; CHECK-NEXT: vrgather.vv v9, v8, v10
249 ; CHECK-NEXT: vmv.v.v v8, v9
251 %res = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
255 define <32 x i8> @reverse_v32i8(<32 x i8> %a) {
256 ; CHECK-LABEL: reverse_v32i8:
258 ; CHECK-NEXT: csrr a0, vlenb
259 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
260 ; CHECK-NEXT: vid.v v10
261 ; CHECK-NEXT: addi a1, a0, -1
262 ; CHECK-NEXT: slli a0, a0, 1
263 ; CHECK-NEXT: vrsub.vx v10, v10, a1
264 ; CHECK-NEXT: addi a0, a0, -32
265 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
266 ; CHECK-NEXT: vrgatherei16.vv v13, v8, v10
267 ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10
268 ; CHECK-NEXT: li a1, 32
269 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
270 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
272 %res = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
276 define <64 x i8> @reverse_v64i8(<64 x i8> %a) {
277 ; CHECK-LABEL: reverse_v64i8:
279 ; CHECK-NEXT: csrr a0, vlenb
280 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
281 ; CHECK-NEXT: vid.v v12
282 ; CHECK-NEXT: addi a1, a0, -1
283 ; CHECK-NEXT: slli a0, a0, 2
284 ; CHECK-NEXT: vrsub.vx v12, v12, a1
285 ; CHECK-NEXT: addi a0, a0, -64
286 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
287 ; CHECK-NEXT: vrgatherei16.vv v19, v8, v12
288 ; CHECK-NEXT: vrgatherei16.vv v18, v9, v12
289 ; CHECK-NEXT: vrgatherei16.vv v17, v10, v12
290 ; CHECK-NEXT: vrgatherei16.vv v16, v11, v12
291 ; CHECK-NEXT: li a1, 64
292 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
293 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
295 %res = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
299 define <1 x i16> @reverse_v1i16(<1 x i16> %a) {
300 ; CHECK-LABEL: reverse_v1i16:
303 %res = shufflevector <1 x i16> %a, <1 x i16> poison, <1 x i32> <i32 0>
307 define <2 x i16> @reverse_v2i16(<2 x i16> %a) {
308 ; NO-ZVBB-LABEL: reverse_v2i16:
310 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
311 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
312 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
313 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
316 ; ZVBB-LABEL: reverse_v2i16:
318 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
319 ; ZVBB-NEXT: vror.vi v8, v8, 16
321 %res = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
325 define <4 x i16> @reverse_v4i16(<4 x i16> %a) {
326 ; CHECK-LABEL: reverse_v4i16:
328 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
329 ; CHECK-NEXT: vid.v v9
330 ; CHECK-NEXT: vrsub.vi v10, v9, 3
331 ; CHECK-NEXT: vrgather.vv v9, v8, v10
332 ; CHECK-NEXT: vmv1r.v v8, v9
334 %res = shufflevector <4 x i16> %a, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
338 define <8 x i16> @reverse_v8i16(<8 x i16> %a) {
339 ; CHECK-LABEL: reverse_v8i16:
341 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
342 ; CHECK-NEXT: vid.v v9
343 ; CHECK-NEXT: vrsub.vi v10, v9, 7
344 ; CHECK-NEXT: vrgather.vv v9, v8, v10
345 ; CHECK-NEXT: vmv.v.v v8, v9
347 %res = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
351 define <16 x i16> @reverse_v16i16(<16 x i16> %a) {
352 ; CHECK-LABEL: reverse_v16i16:
354 ; CHECK-NEXT: csrr a0, vlenb
355 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
356 ; CHECK-NEXT: vid.v v10
357 ; CHECK-NEXT: srli a1, a0, 1
358 ; CHECK-NEXT: addi a1, a1, -1
359 ; CHECK-NEXT: vrsub.vx v10, v10, a1
360 ; CHECK-NEXT: vrgather.vv v13, v8, v10
361 ; CHECK-NEXT: vrgather.vv v12, v9, v10
362 ; CHECK-NEXT: addi a0, a0, -16
363 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
364 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
366 %res = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
370 define <32 x i16> @reverse_v32i16(<32 x i16> %a) {
371 ; CHECK-LABEL: reverse_v32i16:
373 ; CHECK-NEXT: csrr a0, vlenb
374 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
375 ; CHECK-NEXT: vid.v v12
376 ; CHECK-NEXT: srli a1, a0, 1
377 ; CHECK-NEXT: slli a0, a0, 1
378 ; CHECK-NEXT: addi a1, a1, -1
379 ; CHECK-NEXT: addi a0, a0, -32
380 ; CHECK-NEXT: vrsub.vx v12, v12, a1
381 ; CHECK-NEXT: vrgather.vv v19, v8, v12
382 ; CHECK-NEXT: vrgather.vv v18, v9, v12
383 ; CHECK-NEXT: vrgather.vv v17, v10, v12
384 ; CHECK-NEXT: vrgather.vv v16, v11, v12
385 ; CHECK-NEXT: li a1, 32
386 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
387 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
389 %res = shufflevector <32 x i16> %a, <32 x i16> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
393 define <1 x i32> @reverse_v1i32(<1 x i32> %a) {
394 ; CHECK-LABEL: reverse_v1i32:
397 %res = shufflevector <1 x i32> %a, <1 x i32> poison, <1 x i32> <i32 0>
401 define <2 x i32> @reverse_v2i32(<2 x i32> %a) {
402 ; NO-ZVBB-LABEL: reverse_v2i32:
404 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
405 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
406 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
407 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
410 ; ZVBB-LABEL: reverse_v2i32:
412 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
413 ; ZVBB-NEXT: vror.vi v8, v8, 32
415 %res = shufflevector <2 x i32> %a, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
419 define <4 x i32> @reverse_v4i32(<4 x i32> %a) {
420 ; CHECK-LABEL: reverse_v4i32:
422 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
423 ; CHECK-NEXT: vid.v v9
424 ; CHECK-NEXT: vrsub.vi v10, v9, 3
425 ; CHECK-NEXT: vrgather.vv v9, v8, v10
426 ; CHECK-NEXT: vmv.v.v v8, v9
428 %res = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
432 define <8 x i32> @reverse_v8i32(<8 x i32> %a) {
433 ; CHECK-LABEL: reverse_v8i32:
435 ; CHECK-NEXT: csrr a0, vlenb
436 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
437 ; CHECK-NEXT: vid.v v10
438 ; CHECK-NEXT: srli a1, a0, 2
439 ; CHECK-NEXT: srli a0, a0, 1
440 ; CHECK-NEXT: addi a1, a1, -1
441 ; CHECK-NEXT: vrsub.vx v10, v10, a1
442 ; CHECK-NEXT: vrgather.vv v13, v8, v10
443 ; CHECK-NEXT: vrgather.vv v12, v9, v10
444 ; CHECK-NEXT: addi a0, a0, -8
445 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
446 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
448 %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
452 define <16 x i32> @reverse_v16i32(<16 x i32> %a) {
453 ; CHECK-LABEL: reverse_v16i32:
455 ; CHECK-NEXT: csrr a0, vlenb
456 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
457 ; CHECK-NEXT: vid.v v12
458 ; CHECK-NEXT: srli a1, a0, 2
459 ; CHECK-NEXT: addi a1, a1, -1
460 ; CHECK-NEXT: vrsub.vx v16, v12, a1
461 ; CHECK-NEXT: vrgather.vv v15, v8, v16
462 ; CHECK-NEXT: vrgather.vv v14, v9, v16
463 ; CHECK-NEXT: vrgather.vv v13, v10, v16
464 ; CHECK-NEXT: vrgather.vv v12, v11, v16
465 ; CHECK-NEXT: addi a0, a0, -16
466 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
467 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
469 %res = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
473 define <1 x i64> @reverse_v1i64(<1 x i64> %a) {
474 ; CHECK-LABEL: reverse_v1i64:
477 %res = shufflevector <1 x i64> %a, <1 x i64> poison, <1 x i32> <i32 0>
481 define <2 x i64> @reverse_v2i64(<2 x i64> %a) {
482 ; CHECK-LABEL: reverse_v2i64:
484 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
485 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
486 ; CHECK-NEXT: vslideup.vi v9, v8, 1
487 ; CHECK-NEXT: vmv.v.v v8, v9
489 %res = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
493 define <4 x i64> @reverse_v4i64(<4 x i64> %a) {
494 ; CHECK-LABEL: reverse_v4i64:
496 ; CHECK-NEXT: csrr a0, vlenb
497 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
498 ; CHECK-NEXT: vid.v v10
499 ; CHECK-NEXT: srli a1, a0, 3
500 ; CHECK-NEXT: srli a0, a0, 2
501 ; CHECK-NEXT: addi a1, a1, -1
502 ; CHECK-NEXT: vrsub.vx v10, v10, a1
503 ; CHECK-NEXT: vrgather.vv v13, v8, v10
504 ; CHECK-NEXT: vrgather.vv v12, v9, v10
505 ; CHECK-NEXT: addi a0, a0, -4
506 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
507 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
509 %res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
513 define <8 x i64> @reverse_v8i64(<8 x i64> %a) {
514 ; CHECK-LABEL: reverse_v8i64:
516 ; CHECK-NEXT: csrr a0, vlenb
517 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
518 ; CHECK-NEXT: vid.v v12
519 ; CHECK-NEXT: srli a1, a0, 3
520 ; CHECK-NEXT: srli a0, a0, 1
521 ; CHECK-NEXT: addi a1, a1, -1
522 ; CHECK-NEXT: vrsub.vx v12, v12, a1
523 ; CHECK-NEXT: vrgather.vv v19, v8, v12
524 ; CHECK-NEXT: vrgather.vv v18, v9, v12
525 ; CHECK-NEXT: vrgather.vv v17, v10, v12
526 ; CHECK-NEXT: vrgather.vv v16, v11, v12
527 ; CHECK-NEXT: addi a0, a0, -8
528 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
529 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
531 %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
536 define <1 x half> @reverse_v1f16(<1 x half> %a) {
537 ; CHECK-LABEL: reverse_v1f16:
540 %res = shufflevector <1 x half> %a, <1 x half> poison, <1 x i32> <i32 0>
544 define <2 x half> @reverse_v2f16(<2 x half> %a) {
545 ; NO-ZVBB-LABEL: reverse_v2f16:
547 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
548 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
549 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
550 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
553 ; ZVBB-LABEL: reverse_v2f16:
555 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
556 ; ZVBB-NEXT: vror.vi v8, v8, 16
558 %res = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0>
562 define <4 x half> @reverse_v4f16(<4 x half> %a) {
563 ; CHECK-LABEL: reverse_v4f16:
565 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
566 ; CHECK-NEXT: vid.v v9
567 ; CHECK-NEXT: vrsub.vi v10, v9, 3
568 ; CHECK-NEXT: vrgather.vv v9, v8, v10
569 ; CHECK-NEXT: vmv1r.v v8, v9
571 %res = shufflevector <4 x half> %a, <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
575 define <8 x half> @reverse_v8f16(<8 x half> %a) {
576 ; CHECK-LABEL: reverse_v8f16:
578 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
579 ; CHECK-NEXT: vid.v v9
580 ; CHECK-NEXT: vrsub.vi v10, v9, 7
581 ; CHECK-NEXT: vrgather.vv v9, v8, v10
582 ; CHECK-NEXT: vmv.v.v v8, v9
584 %res = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
588 define <16 x half> @reverse_v16f16(<16 x half> %a) {
589 ; CHECK-LABEL: reverse_v16f16:
591 ; CHECK-NEXT: csrr a0, vlenb
592 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
593 ; CHECK-NEXT: vid.v v10
594 ; CHECK-NEXT: srli a1, a0, 1
595 ; CHECK-NEXT: addi a1, a1, -1
596 ; CHECK-NEXT: vrsub.vx v10, v10, a1
597 ; CHECK-NEXT: vrgather.vv v13, v8, v10
598 ; CHECK-NEXT: vrgather.vv v12, v9, v10
599 ; CHECK-NEXT: addi a0, a0, -16
600 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
601 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
603 %res = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
607 define <32 x half> @reverse_v32f16(<32 x half> %a) {
608 ; CHECK-LABEL: reverse_v32f16:
610 ; CHECK-NEXT: csrr a0, vlenb
611 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
612 ; CHECK-NEXT: vid.v v12
613 ; CHECK-NEXT: srli a1, a0, 1
614 ; CHECK-NEXT: slli a0, a0, 1
615 ; CHECK-NEXT: addi a1, a1, -1
616 ; CHECK-NEXT: addi a0, a0, -32
617 ; CHECK-NEXT: vrsub.vx v12, v12, a1
618 ; CHECK-NEXT: vrgather.vv v19, v8, v12
619 ; CHECK-NEXT: vrgather.vv v18, v9, v12
620 ; CHECK-NEXT: vrgather.vv v17, v10, v12
621 ; CHECK-NEXT: vrgather.vv v16, v11, v12
622 ; CHECK-NEXT: li a1, 32
623 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
624 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
626 %res = shufflevector <32 x half> %a, <32 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
630 define <1 x float> @reverse_v1f32(<1 x float> %a) {
631 ; CHECK-LABEL: reverse_v1f32:
634 %res = shufflevector <1 x float> %a, <1 x float> poison, <1 x i32> <i32 0>
638 define <2 x float> @reverse_v2f32(<2 x float> %a) {
639 ; NO-ZVBB-LABEL: reverse_v2f32:
641 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
642 ; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1
643 ; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1
644 ; NO-ZVBB-NEXT: vmv1r.v v8, v9
647 ; ZVBB-LABEL: reverse_v2f32:
649 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
650 ; ZVBB-NEXT: vror.vi v8, v8, 32
652 %res = shufflevector <2 x float> %a, <2 x float> poison, <2 x i32> <i32 1, i32 0>
656 define <4 x float> @reverse_v4f32(<4 x float> %a) {
657 ; CHECK-LABEL: reverse_v4f32:
659 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
660 ; CHECK-NEXT: vid.v v9
661 ; CHECK-NEXT: vrsub.vi v10, v9, 3
662 ; CHECK-NEXT: vrgather.vv v9, v8, v10
663 ; CHECK-NEXT: vmv.v.v v8, v9
665 %res = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
669 define <8 x float> @reverse_v8f32(<8 x float> %a) {
670 ; CHECK-LABEL: reverse_v8f32:
672 ; CHECK-NEXT: csrr a0, vlenb
673 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
674 ; CHECK-NEXT: vid.v v10
675 ; CHECK-NEXT: srli a1, a0, 2
676 ; CHECK-NEXT: srli a0, a0, 1
677 ; CHECK-NEXT: addi a1, a1, -1
678 ; CHECK-NEXT: vrsub.vx v10, v10, a1
679 ; CHECK-NEXT: vrgather.vv v13, v8, v10
680 ; CHECK-NEXT: vrgather.vv v12, v9, v10
681 ; CHECK-NEXT: addi a0, a0, -8
682 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
683 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
685 %res = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
689 define <16 x float> @reverse_v16f32(<16 x float> %a) {
690 ; CHECK-LABEL: reverse_v16f32:
692 ; CHECK-NEXT: csrr a0, vlenb
693 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
694 ; CHECK-NEXT: vid.v v12
695 ; CHECK-NEXT: srli a1, a0, 2
696 ; CHECK-NEXT: addi a1, a1, -1
697 ; CHECK-NEXT: vrsub.vx v16, v12, a1
698 ; CHECK-NEXT: vrgather.vv v15, v8, v16
699 ; CHECK-NEXT: vrgather.vv v14, v9, v16
700 ; CHECK-NEXT: vrgather.vv v13, v10, v16
701 ; CHECK-NEXT: vrgather.vv v12, v11, v16
702 ; CHECK-NEXT: addi a0, a0, -16
703 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
704 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
706 %res = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
707 ret <16 x float> %res
710 define <1 x double> @reverse_v1f64(<1 x double> %a) {
711 ; CHECK-LABEL: reverse_v1f64:
714 %res = shufflevector <1 x double> %a, <1 x double> poison, <1 x i32> <i32 0>
715 ret <1 x double> %res
718 define <2 x double> @reverse_v2f64(<2 x double> %a) {
719 ; CHECK-LABEL: reverse_v2f64:
721 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
722 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
723 ; CHECK-NEXT: vslideup.vi v9, v8, 1
724 ; CHECK-NEXT: vmv.v.v v8, v9
726 %res = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0>
727 ret <2 x double> %res
730 define <4 x double> @reverse_v4f64(<4 x double> %a) {
731 ; CHECK-LABEL: reverse_v4f64:
733 ; CHECK-NEXT: csrr a0, vlenb
734 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
735 ; CHECK-NEXT: vid.v v10
736 ; CHECK-NEXT: srli a1, a0, 3
737 ; CHECK-NEXT: srli a0, a0, 2
738 ; CHECK-NEXT: addi a1, a1, -1
739 ; CHECK-NEXT: vrsub.vx v10, v10, a1
740 ; CHECK-NEXT: vrgather.vv v13, v8, v10
741 ; CHECK-NEXT: vrgather.vv v12, v9, v10
742 ; CHECK-NEXT: addi a0, a0, -4
743 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
744 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
746 %res = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
747 ret <4 x double> %res
750 define <8 x double> @reverse_v8f64(<8 x double> %a) {
751 ; CHECK-LABEL: reverse_v8f64:
753 ; CHECK-NEXT: csrr a0, vlenb
754 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
755 ; CHECK-NEXT: vid.v v12
756 ; CHECK-NEXT: srli a1, a0, 3
757 ; CHECK-NEXT: srli a0, a0, 1
758 ; CHECK-NEXT: addi a1, a1, -1
759 ; CHECK-NEXT: vrsub.vx v12, v12, a1
760 ; CHECK-NEXT: vrgather.vv v19, v8, v12
761 ; CHECK-NEXT: vrgather.vv v18, v9, v12
762 ; CHECK-NEXT: vrgather.vv v17, v10, v12
763 ; CHECK-NEXT: vrgather.vv v16, v11, v12
764 ; CHECK-NEXT: addi a0, a0, -8
765 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
766 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
768 %res = shufflevector <8 x double> %a, <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
769 ret <8 x double> %res
773 define <3 x i64> @reverse_v3i64(<3 x i64> %a) {
774 ; RV32-LABEL: reverse_v3i64:
776 ; RV32-NEXT: lui a0, %hi(.LCPI44_0)
777 ; RV32-NEXT: addi a0, a0, %lo(.LCPI44_0)
778 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
779 ; RV32-NEXT: vle16.v v12, (a0)
780 ; RV32-NEXT: vrgatherei16.vv v10, v8, v12
781 ; RV32-NEXT: vmv.v.v v8, v10
784 ; RV64-LABEL: reverse_v3i64:
786 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
787 ; RV64-NEXT: vid.v v10
788 ; RV64-NEXT: vrsub.vi v12, v10, 2
789 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
790 ; RV64-NEXT: vrgatherei16.vv v10, v8, v12
791 ; RV64-NEXT: vmv.v.v v8, v10
794 ; RV32-ZVBB-LABEL: reverse_v3i64:
795 ; RV32-ZVBB: # %bb.0:
796 ; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI44_0)
797 ; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI44_0)
798 ; RV32-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
799 ; RV32-ZVBB-NEXT: vle16.v v12, (a0)
800 ; RV32-ZVBB-NEXT: vrgatherei16.vv v10, v8, v12
801 ; RV32-ZVBB-NEXT: vmv.v.v v8, v10
802 ; RV32-ZVBB-NEXT: ret
804 ; RV64-ZVBB-LABEL: reverse_v3i64:
805 ; RV64-ZVBB: # %bb.0:
806 ; RV64-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
807 ; RV64-ZVBB-NEXT: vid.v v10
808 ; RV64-ZVBB-NEXT: vrsub.vi v12, v10, 2
809 ; RV64-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
810 ; RV64-ZVBB-NEXT: vrgatherei16.vv v10, v8, v12
811 ; RV64-ZVBB-NEXT: vmv.v.v v8, v10
812 ; RV64-ZVBB-NEXT: ret
813 %res = shufflevector <3 x i64> %a, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 0>
817 define <6 x i64> @reverse_v6i64(<6 x i64> %a) {
818 ; RV32-LABEL: reverse_v6i64:
820 ; RV32-NEXT: lui a0, %hi(.LCPI45_0)
821 ; RV32-NEXT: addi a0, a0, %lo(.LCPI45_0)
822 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
823 ; RV32-NEXT: vle16.v v16, (a0)
824 ; RV32-NEXT: vrgatherei16.vv v12, v8, v16
825 ; RV32-NEXT: vmv.v.v v8, v12
828 ; RV64-LABEL: reverse_v6i64:
830 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
831 ; RV64-NEXT: vid.v v12
832 ; RV64-NEXT: vrsub.vi v16, v12, 5
833 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
834 ; RV64-NEXT: vrgatherei16.vv v12, v8, v16
835 ; RV64-NEXT: vmv.v.v v8, v12
838 ; RV32-ZVBB-LABEL: reverse_v6i64:
839 ; RV32-ZVBB: # %bb.0:
840 ; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI45_0)
841 ; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI45_0)
842 ; RV32-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma
843 ; RV32-ZVBB-NEXT: vle16.v v16, (a0)
844 ; RV32-ZVBB-NEXT: vrgatherei16.vv v12, v8, v16
845 ; RV32-ZVBB-NEXT: vmv.v.v v8, v12
846 ; RV32-ZVBB-NEXT: ret
848 ; RV64-ZVBB-LABEL: reverse_v6i64:
849 ; RV64-ZVBB: # %bb.0:
850 ; RV64-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
851 ; RV64-ZVBB-NEXT: vid.v v12
852 ; RV64-ZVBB-NEXT: vrsub.vi v16, v12, 5
853 ; RV64-ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma
854 ; RV64-ZVBB-NEXT: vrgatherei16.vv v12, v8, v16
855 ; RV64-ZVBB-NEXT: vmv.v.v v8, v12
856 ; RV64-ZVBB-NEXT: ret
857 %res = shufflevector <6 x i64> %a, <6 x i64> poison, <6 x i32> <i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
861 define <12 x i64> @reverse_v12i64(<12 x i64> %a) {
862 ; RV32-LABEL: reverse_v12i64:
864 ; RV32-NEXT: li a0, 32
865 ; RV32-NEXT: lui a1, %hi(.LCPI46_0)
866 ; RV32-NEXT: addi a1, a1, %lo(.LCPI46_0)
867 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
868 ; RV32-NEXT: vle16.v v24, (a1)
869 ; RV32-NEXT: vrgatherei16.vv v16, v8, v24
870 ; RV32-NEXT: vmv.v.v v8, v16
873 ; RV64-LABEL: reverse_v12i64:
875 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
876 ; RV64-NEXT: vid.v v16
877 ; RV64-NEXT: vrsub.vi v24, v16, 11
878 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
879 ; RV64-NEXT: vrgatherei16.vv v16, v8, v24
880 ; RV64-NEXT: vmv.v.v v8, v16
883 ; RV32-ZVBB-LABEL: reverse_v12i64:
884 ; RV32-ZVBB: # %bb.0:
885 ; RV32-ZVBB-NEXT: li a0, 32
886 ; RV32-ZVBB-NEXT: lui a1, %hi(.LCPI46_0)
887 ; RV32-ZVBB-NEXT: addi a1, a1, %lo(.LCPI46_0)
888 ; RV32-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
889 ; RV32-ZVBB-NEXT: vle16.v v24, (a1)
890 ; RV32-ZVBB-NEXT: vrgatherei16.vv v16, v8, v24
891 ; RV32-ZVBB-NEXT: vmv.v.v v8, v16
892 ; RV32-ZVBB-NEXT: ret
894 ; RV64-ZVBB-LABEL: reverse_v12i64:
895 ; RV64-ZVBB: # %bb.0:
896 ; RV64-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
897 ; RV64-ZVBB-NEXT: vid.v v16
898 ; RV64-ZVBB-NEXT: vrsub.vi v24, v16, 11
899 ; RV64-ZVBB-NEXT: vsetvli zero, zero, e64, m8, ta, ma
900 ; RV64-ZVBB-NEXT: vrgatherei16.vv v16, v8, v24
901 ; RV64-ZVBB-NEXT: vmv.v.v v8, v16
902 ; RV64-ZVBB-NEXT: ret
903 %res = shufflevector <12 x i64> %a, <12 x i64> poison, <12 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
907 define <4 x i8> @reverse_v4i8_2(<2 x i8> %a, <2 x i8> %b) {
908 ; NO-ZVBB-LABEL: reverse_v4i8_2:
910 ; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
911 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
912 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
913 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
914 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
915 ; NO-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
916 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
919 ; ZVBB-LABEL: reverse_v4i8_2:
921 ; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
922 ; ZVBB-NEXT: vrev8.v v10, v8
923 ; ZVBB-NEXT: vrev8.v v8, v9
924 ; ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
925 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
927 %res = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
931 define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) {
932 ; CHECK-LABEL: reverse_v8i8_2:
934 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
935 ; CHECK-NEXT: vid.v v11
936 ; CHECK-NEXT: vrsub.vi v12, v11, 7
937 ; CHECK-NEXT: vrgather.vv v10, v8, v12
938 ; CHECK-NEXT: vmv.v.i v0, 15
939 ; CHECK-NEXT: vrsub.vi v8, v11, 3
940 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
941 ; CHECK-NEXT: vmv1r.v v8, v10
943 %res = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
947 define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) {
948 ; CHECK-LABEL: reverse_v16i8_2:
950 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
951 ; CHECK-NEXT: vid.v v11
952 ; CHECK-NEXT: li a0, 255
953 ; CHECK-NEXT: vrsub.vi v12, v11, 15
954 ; CHECK-NEXT: vrgather.vv v10, v8, v12
955 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
956 ; CHECK-NEXT: vmv.s.x v0, a0
957 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
958 ; CHECK-NEXT: vrsub.vi v8, v11, 7
959 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
960 ; CHECK-NEXT: vmv.v.v v8, v10
962 %res = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
966 define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
967 ; CHECK-LABEL: reverse_v32i8_2:
969 ; CHECK-NEXT: vmv1r.v v10, v9
970 ; CHECK-NEXT: csrr a0, vlenb
971 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
972 ; CHECK-NEXT: vid.v v12
973 ; CHECK-NEXT: addi a1, a0, -1
974 ; CHECK-NEXT: vrsub.vx v12, v12, a1
975 ; CHECK-NEXT: lui a1, 16
976 ; CHECK-NEXT: addi a1, a1, -1
977 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
978 ; CHECK-NEXT: vrgatherei16.vv v15, v8, v12
979 ; CHECK-NEXT: vrgatherei16.vv v14, v9, v12
980 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
981 ; CHECK-NEXT: vmv.s.x v0, a1
982 ; CHECK-NEXT: li a1, 32
983 ; CHECK-NEXT: slli a0, a0, 1
984 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
985 ; CHECK-NEXT: vid.v v8
986 ; CHECK-NEXT: addi a0, a0, -32
987 ; CHECK-NEXT: vrsub.vi v12, v8, 15
988 ; CHECK-NEXT: vslidedown.vx v8, v14, a0
989 ; CHECK-NEXT: vrgather.vv v8, v10, v12, v0.t
991 %res = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
995 define <4 x i16> @reverse_v4i16_2(<2 x i16> %a, <2 x i16> %b) {
996 ; NO-ZVBB-LABEL: reverse_v4i16_2:
998 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
999 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1000 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1001 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1002 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1003 ; NO-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1004 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1007 ; ZVBB-LABEL: reverse_v4i16_2:
1009 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1010 ; ZVBB-NEXT: vror.vi v10, v8, 16
1011 ; ZVBB-NEXT: vror.vi v8, v9, 16
1012 ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1013 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1015 %res = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1019 define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) {
1020 ; CHECK-LABEL: reverse_v8i16_2:
1022 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1023 ; CHECK-NEXT: vid.v v11
1024 ; CHECK-NEXT: vrsub.vi v12, v11, 7
1025 ; CHECK-NEXT: vrgather.vv v10, v8, v12
1026 ; CHECK-NEXT: vmv.v.i v0, 15
1027 ; CHECK-NEXT: vrsub.vi v8, v11, 3
1028 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
1029 ; CHECK-NEXT: vmv.v.v v8, v10
1031 %res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1035 define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
1036 ; CHECK-LABEL: reverse_v16i16_2:
1038 ; CHECK-NEXT: vmv1r.v v10, v9
1039 ; CHECK-NEXT: csrr a0, vlenb
1040 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1041 ; CHECK-NEXT: vid.v v9
1042 ; CHECK-NEXT: srli a1, a0, 1
1043 ; CHECK-NEXT: addi a1, a1, -1
1044 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1045 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1046 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1047 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
1048 ; CHECK-NEXT: vid.v v8
1049 ; CHECK-NEXT: li a1, 255
1050 ; CHECK-NEXT: addi a0, a0, -16
1051 ; CHECK-NEXT: vrsub.vi v14, v8, 7
1052 ; CHECK-NEXT: vmv.s.x v0, a1
1053 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1054 ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
1056 %res = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1060 define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) {
1061 ; CHECK-LABEL: reverse_v32i16_2:
1063 ; CHECK-NEXT: vmv2r.v v12, v10
1064 ; CHECK-NEXT: csrr a0, vlenb
1065 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1066 ; CHECK-NEXT: vid.v v10
1067 ; CHECK-NEXT: lui a1, 16
1068 ; CHECK-NEXT: addi a1, a1, -1
1069 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1070 ; CHECK-NEXT: vmv.s.x v0, a1
1071 ; CHECK-NEXT: srli a1, a0, 1
1072 ; CHECK-NEXT: addi a1, a1, -1
1073 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1074 ; CHECK-NEXT: vrsub.vx v10, v10, a1
1075 ; CHECK-NEXT: li a1, 32
1076 ; CHECK-NEXT: slli a0, a0, 1
1077 ; CHECK-NEXT: vrgather.vv v19, v8, v10
1078 ; CHECK-NEXT: vrgather.vv v18, v9, v10
1079 ; CHECK-NEXT: vrgather.vv v16, v11, v10
1080 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
1081 ; CHECK-NEXT: vid.v v8
1082 ; CHECK-NEXT: addi a0, a0, -32
1083 ; CHECK-NEXT: vrsub.vi v20, v8, 15
1084 ; CHECK-NEXT: vmv1r.v v17, v16
1085 ; CHECK-NEXT: vslidedown.vx v8, v16, a0
1086 ; CHECK-NEXT: vrgather.vv v8, v12, v20, v0.t
1088 %res = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1092 define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) {
1093 ; NO-ZVBB-LABEL: reverse_v4i32_2:
1095 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1096 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1097 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1098 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1099 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1100 ; NO-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1101 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1104 ; ZVBB-LABEL: reverse_v4i32_2:
1106 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1107 ; ZVBB-NEXT: vror.vi v10, v8, 32
1108 ; ZVBB-NEXT: vror.vi v8, v9, 32
1109 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1110 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1112 %res = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1116 define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
1117 ; CHECK-LABEL: reverse_v8i32_2:
1119 ; CHECK-NEXT: vmv1r.v v10, v9
1120 ; CHECK-NEXT: csrr a0, vlenb
1121 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1122 ; CHECK-NEXT: vid.v v9
1123 ; CHECK-NEXT: srli a1, a0, 2
1124 ; CHECK-NEXT: addi a1, a1, -1
1125 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1126 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1127 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1128 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1129 ; CHECK-NEXT: vid.v v8
1130 ; CHECK-NEXT: vmv.v.i v0, 15
1131 ; CHECK-NEXT: srli a0, a0, 1
1132 ; CHECK-NEXT: vrsub.vi v14, v8, 3
1133 ; CHECK-NEXT: addi a0, a0, -8
1134 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1135 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1136 ; CHECK-NEXT: vrgatherei16.vv v8, v10, v14, v0.t
1138 %res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1142 define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) {
1143 ; CHECK-LABEL: reverse_v16i32_2:
1145 ; CHECK-NEXT: vmv2r.v v12, v10
1146 ; CHECK-NEXT: csrr a0, vlenb
1147 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1148 ; CHECK-NEXT: vid.v v10
1149 ; CHECK-NEXT: srli a1, a0, 2
1150 ; CHECK-NEXT: addi a1, a1, -1
1151 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1152 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1153 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1154 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1155 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1156 ; CHECK-NEXT: vid.v v14
1157 ; CHECK-NEXT: li a1, 255
1158 ; CHECK-NEXT: addi a0, a0, -16
1159 ; CHECK-NEXT: vrsub.vi v16, v14, 7
1160 ; CHECK-NEXT: vmv.s.x v0, a1
1161 ; CHECK-NEXT: vmv1r.v v9, v8
1162 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1163 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1164 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1166 %res = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1170 define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) {
1171 ; CHECK-LABEL: reverse_v32i32_2:
1173 ; CHECK-NEXT: vmv4r.v v16, v12
1174 ; CHECK-NEXT: csrr a0, vlenb
1175 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1176 ; CHECK-NEXT: vid.v v12
1177 ; CHECK-NEXT: srli a1, a0, 2
1178 ; CHECK-NEXT: addi a1, a1, -1
1179 ; CHECK-NEXT: vrsub.vx v20, v12, a1
1180 ; CHECK-NEXT: vrgather.vv v15, v8, v20
1181 ; CHECK-NEXT: vrgather.vv v14, v9, v20
1182 ; CHECK-NEXT: vrgather.vv v13, v10, v20
1183 ; CHECK-NEXT: vrgather.vv v12, v11, v20
1184 ; CHECK-NEXT: lui a1, 16
1185 ; CHECK-NEXT: addi a1, a1, -1
1186 ; CHECK-NEXT: vmv.s.x v0, a1
1187 ; CHECK-NEXT: li a1, 32
1188 ; CHECK-NEXT: slli a0, a0, 1
1189 ; CHECK-NEXT: vrgather.vv v8, v9, v20
1190 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
1191 ; CHECK-NEXT: vid.v v20
1192 ; CHECK-NEXT: addi a0, a0, -32
1193 ; CHECK-NEXT: vmv1r.v v9, v8
1194 ; CHECK-NEXT: vrsub.vi v24, v20, 15
1195 ; CHECK-NEXT: vmv2r.v v10, v8
1196 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1197 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1198 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
1200 %res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1204 define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) {
1205 ; CHECK-LABEL: reverse_v4i64_2:
1207 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1208 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
1209 ; CHECK-NEXT: vslideup.vi v10, v8, 1
1210 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
1211 ; CHECK-NEXT: vslideup.vi v8, v9, 1
1212 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1213 ; CHECK-NEXT: vslideup.vi v8, v10, 2
1215 %res = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1219 define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) {
1220 ; CHECK-LABEL: reverse_v8i64_2:
1222 ; CHECK-NEXT: vmv2r.v v12, v10
1223 ; CHECK-NEXT: csrr a0, vlenb
1224 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1225 ; CHECK-NEXT: vid.v v10
1226 ; CHECK-NEXT: srli a1, a0, 3
1227 ; CHECK-NEXT: addi a1, a1, -1
1228 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1229 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1230 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1231 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1232 ; CHECK-NEXT: vid.v v15
1233 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1234 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1235 ; CHECK-NEXT: vmv.v.i v0, 15
1236 ; CHECK-NEXT: srli a0, a0, 1
1237 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1238 ; CHECK-NEXT: vrsub.vi v16, v15, 3
1239 ; CHECK-NEXT: addi a0, a0, -8
1240 ; CHECK-NEXT: vmv1r.v v9, v8
1241 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1242 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1243 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1245 %res = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1249 define <4 x half> @reverse_v4f16_2(<2 x half> %a, <2 x half> %b) {
1250 ; NO-ZVBB-LABEL: reverse_v4f16_2:
1252 ; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1253 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1254 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1255 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1256 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1257 ; NO-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1258 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1261 ; ZVBB-LABEL: reverse_v4f16_2:
1263 ; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1264 ; ZVBB-NEXT: vror.vi v10, v8, 16
1265 ; ZVBB-NEXT: vror.vi v8, v9, 16
1266 ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1267 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1269 %res = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1273 define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) {
1274 ; CHECK-LABEL: reverse_v8f16_2:
1276 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1277 ; CHECK-NEXT: vid.v v11
1278 ; CHECK-NEXT: vrsub.vi v12, v11, 7
1279 ; CHECK-NEXT: vrgather.vv v10, v8, v12
1280 ; CHECK-NEXT: vmv.v.i v0, 15
1281 ; CHECK-NEXT: vrsub.vi v8, v11, 3
1282 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
1283 ; CHECK-NEXT: vmv.v.v v8, v10
1285 %res = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1289 define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) {
1290 ; CHECK-LABEL: reverse_v16f16_2:
1292 ; CHECK-NEXT: vmv1r.v v10, v9
1293 ; CHECK-NEXT: csrr a0, vlenb
1294 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1295 ; CHECK-NEXT: vid.v v9
1296 ; CHECK-NEXT: srli a1, a0, 1
1297 ; CHECK-NEXT: addi a1, a1, -1
1298 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1299 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1300 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1301 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
1302 ; CHECK-NEXT: vid.v v8
1303 ; CHECK-NEXT: li a1, 255
1304 ; CHECK-NEXT: addi a0, a0, -16
1305 ; CHECK-NEXT: vrsub.vi v14, v8, 7
1306 ; CHECK-NEXT: vmv.s.x v0, a1
1307 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1308 ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
1310 %res = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1311 ret <16 x half> %res
1314 define <32 x half> @reverse_v32f16_2(<16 x half> %a) {
1315 ; CHECK-LABEL: reverse_v32f16_2:
1317 ; CHECK-NEXT: csrr a0, vlenb
1318 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1319 ; CHECK-NEXT: vid.v v10
1320 ; CHECK-NEXT: srli a1, a0, 1
1321 ; CHECK-NEXT: addi a1, a1, -1
1322 ; CHECK-NEXT: vrsub.vx v12, v10, a1
1323 ; CHECK-NEXT: vrgather.vv v11, v8, v12
1324 ; CHECK-NEXT: vrgather.vv v10, v9, v12
1325 ; CHECK-NEXT: li a1, 32
1326 ; CHECK-NEXT: slli a0, a0, 1
1327 ; CHECK-NEXT: vrgather.vv v8, v9, v12
1328 ; CHECK-NEXT: addi a0, a0, -32
1329 ; CHECK-NEXT: vmv.v.v v9, v8
1330 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
1331 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1333 %res = shufflevector <16 x half> %a, <16 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1334 ret <32 x half> %res
1337 define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) {
1338 ; NO-ZVBB-LABEL: reverse_v4f32_2:
1340 ; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1341 ; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1
1342 ; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1
1343 ; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1
1344 ; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1
1345 ; NO-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1346 ; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2
1349 ; ZVBB-LABEL: reverse_v4f32_2:
1351 ; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1352 ; ZVBB-NEXT: vror.vi v10, v8, 32
1353 ; ZVBB-NEXT: vror.vi v8, v9, 32
1354 ; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1355 ; ZVBB-NEXT: vslideup.vi v8, v10, 2
1357 %res = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1358 ret <4 x float> %res
1361 define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) {
1362 ; CHECK-LABEL: reverse_v8f32_2:
1364 ; CHECK-NEXT: vmv1r.v v10, v9
1365 ; CHECK-NEXT: csrr a0, vlenb
1366 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1367 ; CHECK-NEXT: vid.v v9
1368 ; CHECK-NEXT: srli a1, a0, 2
1369 ; CHECK-NEXT: addi a1, a1, -1
1370 ; CHECK-NEXT: vrsub.vx v9, v9, a1
1371 ; CHECK-NEXT: vrgather.vv v13, v8, v9
1372 ; CHECK-NEXT: vrgather.vv v12, v11, v9
1373 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1374 ; CHECK-NEXT: vid.v v8
1375 ; CHECK-NEXT: vmv.v.i v0, 15
1376 ; CHECK-NEXT: srli a0, a0, 1
1377 ; CHECK-NEXT: vrsub.vi v14, v8, 3
1378 ; CHECK-NEXT: addi a0, a0, -8
1379 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1380 ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1381 ; CHECK-NEXT: vrgatherei16.vv v8, v10, v14, v0.t
1383 %res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1384 ret <8 x float> %res
1387 define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) {
1388 ; CHECK-LABEL: reverse_v16f32_2:
1390 ; CHECK-NEXT: vmv2r.v v12, v10
1391 ; CHECK-NEXT: csrr a0, vlenb
1392 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1393 ; CHECK-NEXT: vid.v v10
1394 ; CHECK-NEXT: srli a1, a0, 2
1395 ; CHECK-NEXT: addi a1, a1, -1
1396 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1397 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1398 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1399 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1400 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1401 ; CHECK-NEXT: vid.v v14
1402 ; CHECK-NEXT: li a1, 255
1403 ; CHECK-NEXT: addi a0, a0, -16
1404 ; CHECK-NEXT: vrsub.vi v16, v14, 7
1405 ; CHECK-NEXT: vmv.s.x v0, a1
1406 ; CHECK-NEXT: vmv1r.v v9, v8
1407 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1408 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1409 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1411 %res = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1412 ret <16 x float> %res
1415 define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) {
1416 ; CHECK-LABEL: reverse_v4f64_2:
1418 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1419 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
1420 ; CHECK-NEXT: vslideup.vi v10, v8, 1
1421 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
1422 ; CHECK-NEXT: vslideup.vi v8, v9, 1
1423 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1424 ; CHECK-NEXT: vslideup.vi v8, v10, 2
1426 %res = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1427 ret <4 x double> %res
1430 define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) {
1431 ; CHECK-LABEL: reverse_v8f64_2:
1433 ; CHECK-NEXT: vmv2r.v v12, v10
1434 ; CHECK-NEXT: csrr a0, vlenb
1435 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1436 ; CHECK-NEXT: vid.v v10
1437 ; CHECK-NEXT: srli a1, a0, 3
1438 ; CHECK-NEXT: addi a1, a1, -1
1439 ; CHECK-NEXT: vrsub.vx v14, v10, a1
1440 ; CHECK-NEXT: vrgather.vv v11, v8, v14
1441 ; CHECK-NEXT: vrgather.vv v10, v9, v14
1442 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1443 ; CHECK-NEXT: vid.v v15
1444 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1445 ; CHECK-NEXT: vrgather.vv v8, v9, v14
1446 ; CHECK-NEXT: vmv.v.i v0, 15
1447 ; CHECK-NEXT: srli a0, a0, 1
1448 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1449 ; CHECK-NEXT: vrsub.vi v16, v15, 3
1450 ; CHECK-NEXT: addi a0, a0, -8
1451 ; CHECK-NEXT: vmv1r.v v9, v8
1452 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1453 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
1454 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
1456 %res = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1457 ret <8 x double> %res
1460 ; There is no corresponding v1i256 type, so make sure we don't crash if we try
1461 ; to lower via lowerBitreverseShuffle.
1462 define <256 x i1> @reverse_v256i1(<256 x i1> %a) vscale_range(16, 1024) {
1463 ; CHECK-LABEL: reverse_v256i1:
1465 ; CHECK-NEXT: li a0, 256
1466 ; CHECK-NEXT: csrr a1, vlenb
1467 ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1468 ; CHECK-NEXT: vid.v v8
1469 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1470 ; CHECK-NEXT: vmv.v.i v10, 0
1471 ; CHECK-NEXT: addi a2, a1, -1
1472 ; CHECK-NEXT: slli a1, a1, 1
1473 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
1474 ; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma
1475 ; CHECK-NEXT: vrsub.vx v8, v8, a2
1476 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1477 ; CHECK-NEXT: vrgatherei16.vv v13, v10, v8
1478 ; CHECK-NEXT: vrgatherei16.vv v12, v11, v8
1479 ; CHECK-NEXT: addi a1, a1, -256
1480 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1481 ; CHECK-NEXT: vslidedown.vx v8, v12, a1
1482 ; CHECK-NEXT: vmsne.vi v0, v8, 0
1484 %res = shufflevector <256 x i1> %a, <256 x i1> poison, <256 x i32> <i32 255, i32 254, i32 253, i32 252, i32 251, i32 250, i32 249, i32 248, i32 247, i32 246, i32 245, i32 244, i32 243, i32 242, i32 241, i32 240, i32 239, i32 238, i32 237, i32 236, i32 235, i32 234, i32 233, i32 232, i32 231, i32 230, i32 229, i32 228, i32 227, i32 226, i32 225, i32 224, i32 223, i32 222, i32 221, i32 220, i32 219, i32 218, i32 217, i32 216, i32 215, i32 214, i32 213, i32 212, i32 211, i32 210, i32 209, i32 208, i32 207, i32 206, i32 205, i32 204, i32 203, i32 202, i32 201, i32 200, i32 199, i32 198, i32 197, i32 196, i32 195, i32 194, i32 193, i32 192, i32 191, i32 190, i32 189, i32 188, i32 187, i32 186, i32 185, i32 184, i32 183, i32 182, i32 181, i32 180, i32 179, i32 178, i32 177, i32 176, i32 175, i32 174, i32 173, i32 172, i32 171, i32 170, i32 169, i32 168, i32 167, i32 166, i32 165, i32 164, i32 163, i32 162, i32 161, i32 160, i32 159, i32 158, i32 157, i32 156, i32 155, i32 154, i32 153, i32 152, i32 151, i32 150, i32 149, i32 148, i32 147, i32 146, i32 145, i32 144, i32 143, i32 142, i32 141, i32 140, i32 139, i32 138, i32 137, i32 136, i32 135, i32 134, i32 133, i32 132, i32 131, i32 130, i32 129, i32 128, i32 127, i32 126, i32 125, i32 124, i32 123, i32 122, i32 121, i32 120, i32 119, i32 118, i32 117, i32 116, i32 115, i32 114, i32 113, i32 112, i32 111, i32 110, i32 109, i32 108, i32 107, i32 106, i32 105, i32 104, i32 103, i32 102, i32 101, i32 100, i32 99, i32 98, i32 97, i32 96, i32 95, i32 94, i32 93, i32 92, i32 91, i32 90, i32 89, i32 88, i32 87, i32 86, i32 85, i32 84, i32 83, i32 82, i32 81, i32 80, i32 79, i32 78, i32 77, i32 76, i32 75, i32 74, i32 73, i32 72, i32 71, i32 70, i32 69, i32 68, i32 67, i32 66, i32 65, i32 64, i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1488 define <8 x i32> @reverse_v8i32_exact_vlen_128(<8 x i32> %a) vscale_range(2, 2) {
1489 ; CHECK-LABEL: reverse_v8i32_exact_vlen_128:
1491 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1492 ; CHECK-NEXT: vid.v v10
1493 ; CHECK-NEXT: vrsub.vi v12, v10, 3
1494 ; CHECK-NEXT: vrgather.vv v11, v8, v12
1495 ; CHECK-NEXT: vrgather.vv v10, v9, v12
1496 ; CHECK-NEXT: vmv2r.v v8, v10
1498 %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1502 define <16 x i32> @reverse_v16i32_exact_vlen_256(<16 x i32> %a) vscale_range(4, 4) {
1503 ; CHECK-LABEL: reverse_v16i32_exact_vlen_256:
1505 ; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
1506 ; CHECK-NEXT: vid.v v10
1507 ; CHECK-NEXT: vrsub.vi v12, v10, 7
1508 ; CHECK-NEXT: vrgather.vv v11, v8, v12
1509 ; CHECK-NEXT: vrgather.vv v10, v9, v12
1510 ; CHECK-NEXT: vmv2r.v v8, v10
1512 %res = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>