1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+f,+zfh,+zvfh,+d -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+f,+zfh,+zvfh,+d -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i8> @v2i8(<2 x i8> %a) {
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
10 ; CHECK-NEXT: vslideup.vi v9, v8, 1
11 ; CHECK-NEXT: vmv1r.v v8, v9
13 %v2i8 = shufflevector <2 x i8> %a, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
17 define <4 x i8> @v2i8_2(<2 x i8> %a, <2 x i8> %b) {
18 ; CHECK-LABEL: v2i8_2:
20 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
21 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
22 ; CHECK-NEXT: vslideup.vi v10, v8, 1
23 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
24 ; CHECK-NEXT: vslideup.vi v8, v9, 1
25 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
26 ; CHECK-NEXT: vslideup.vi v8, v10, 2
28 %v4i8 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
32 define <4 x i8> @v4i8(<4 x i8> %a) {
35 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
36 ; CHECK-NEXT: vid.v v9
37 ; CHECK-NEXT: vrsub.vi v10, v9, 3
38 ; CHECK-NEXT: vrgather.vv v9, v8, v10
39 ; CHECK-NEXT: vmv1r.v v8, v9
41 %v4i8 = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
45 define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) {
46 ; CHECK-LABEL: v4i8_2:
48 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
49 ; CHECK-NEXT: vid.v v11
50 ; CHECK-NEXT: vrsub.vi v12, v11, 7
51 ; CHECK-NEXT: vrgather.vv v10, v8, v12
52 ; CHECK-NEXT: vmv.v.i v0, 15
53 ; CHECK-NEXT: vrsub.vi v8, v11, 3
54 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
55 ; CHECK-NEXT: vmv1r.v v8, v10
57 %v8i8 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
61 define <8 x i8> @v8i8(<8 x i8> %a) {
64 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
65 ; CHECK-NEXT: vid.v v9
66 ; CHECK-NEXT: vrsub.vi v10, v9, 7
67 ; CHECK-NEXT: vrgather.vv v9, v8, v10
68 ; CHECK-NEXT: vmv1r.v v8, v9
70 %v8i8 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
74 define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) {
75 ; CHECK-LABEL: v8i8_2:
77 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
78 ; CHECK-NEXT: vid.v v11
79 ; CHECK-NEXT: vrsub.vi v12, v11, 15
80 ; CHECK-NEXT: vrgather.vv v10, v8, v12
81 ; CHECK-NEXT: vrsub.vi v8, v11, 7
82 ; CHECK-NEXT: li a0, 255
83 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
84 ; CHECK-NEXT: vmv.s.x v0, a0
85 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
86 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
87 ; CHECK-NEXT: vmv.v.v v8, v10
89 %v16i8 = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
93 define <16 x i8> @v16i8(<16 x i8> %a) {
96 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
97 ; CHECK-NEXT: vid.v v9
98 ; CHECK-NEXT: vrsub.vi v10, v9, 15
99 ; CHECK-NEXT: vrgather.vv v9, v8, v10
100 ; CHECK-NEXT: vmv.v.v v8, v9
102 %v16i8 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
106 define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
107 ; CHECK-LABEL: v16i8_2:
109 ; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
110 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
111 ; CHECK-NEXT: li a1, 32
112 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
113 ; CHECK-NEXT: vle8.v v12, (a0)
114 ; CHECK-NEXT: vmv1r.v v14, v9
115 ; CHECK-NEXT: vrgather.vv v10, v8, v12
116 ; CHECK-NEXT: vid.v v8
117 ; CHECK-NEXT: vrsub.vi v8, v8, 15
118 ; CHECK-NEXT: lui a0, 16
119 ; CHECK-NEXT: addi a0, a0, -1
120 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
121 ; CHECK-NEXT: vmv.s.x v0, a0
122 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu
123 ; CHECK-NEXT: vrgather.vv v10, v14, v8, v0.t
124 ; CHECK-NEXT: vmv.v.v v8, v10
126 %v32i8 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
130 define <2 x i16> @v2i16(<2 x i16> %a) {
131 ; CHECK-LABEL: v2i16:
133 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
134 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
135 ; CHECK-NEXT: vslideup.vi v9, v8, 1
136 ; CHECK-NEXT: vmv1r.v v8, v9
138 %v2i16 = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
142 define <4 x i16> @v2i16_2(<2 x i16> %a, <2 x i16> %b) {
143 ; CHECK-LABEL: v2i16_2:
145 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
146 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
147 ; CHECK-NEXT: vslideup.vi v10, v8, 1
148 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
149 ; CHECK-NEXT: vslideup.vi v8, v9, 1
150 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
151 ; CHECK-NEXT: vslideup.vi v8, v10, 2
153 %v4i16 = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
157 define <4 x i16> @v4i16(<4 x i16> %a) {
158 ; CHECK-LABEL: v4i16:
160 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
161 ; CHECK-NEXT: vid.v v9
162 ; CHECK-NEXT: vrsub.vi v10, v9, 3
163 ; CHECK-NEXT: vrgather.vv v9, v8, v10
164 ; CHECK-NEXT: vmv1r.v v8, v9
166 %v4i16 = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
170 define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) {
171 ; CHECK-LABEL: v4i16_2:
173 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
174 ; CHECK-NEXT: vid.v v11
175 ; CHECK-NEXT: vrsub.vi v12, v11, 7
176 ; CHECK-NEXT: vrgather.vv v10, v8, v12
177 ; CHECK-NEXT: vmv.v.i v0, 15
178 ; CHECK-NEXT: vrsub.vi v8, v11, 3
179 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
180 ; CHECK-NEXT: vmv.v.v v8, v10
182 %v8i16 = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
186 define <8 x i16> @v8i16(<8 x i16> %a) {
187 ; CHECK-LABEL: v8i16:
189 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
190 ; CHECK-NEXT: vid.v v9
191 ; CHECK-NEXT: vrsub.vi v10, v9, 7
192 ; CHECK-NEXT: vrgather.vv v9, v8, v10
193 ; CHECK-NEXT: vmv.v.v v8, v9
195 %v8i16 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
199 define <16 x i16> @v8i16_2(<8 x i16> %a, <8 x i16> %b) {
200 ; CHECK-LABEL: v8i16_2:
202 ; CHECK-NEXT: vmv1r.v v12, v9
203 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
204 ; CHECK-NEXT: vid.v v14
205 ; CHECK-NEXT: vrsub.vi v16, v14, 15
206 ; CHECK-NEXT: vrgather.vv v10, v8, v16
207 ; CHECK-NEXT: vrsub.vi v8, v14, 7
208 ; CHECK-NEXT: li a0, 255
209 ; CHECK-NEXT: vmv.s.x v0, a0
210 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t
211 ; CHECK-NEXT: vmv.v.v v8, v10
213 %v16i16 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
214 ret <16 x i16> %v16i16
217 define <16 x i16> @v16i16(<16 x i16> %a) {
218 ; CHECK-LABEL: v16i16:
220 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
221 ; CHECK-NEXT: vid.v v10
222 ; CHECK-NEXT: vrsub.vi v12, v10, 15
223 ; CHECK-NEXT: vrgather.vv v10, v8, v12
224 ; CHECK-NEXT: vmv.v.v v8, v10
226 %v16i16 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
227 ret <16 x i16> %v16i16
230 define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
231 ; CHECK-LABEL: v16i16_2:
233 ; CHECK-NEXT: lui a0, %hi(.LCPI15_0)
234 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0)
235 ; CHECK-NEXT: li a1, 32
236 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
237 ; CHECK-NEXT: vle16.v v20, (a0)
238 ; CHECK-NEXT: vmv2r.v v16, v10
239 ; CHECK-NEXT: vmv2r.v v12, v8
240 ; CHECK-NEXT: vrgather.vv v8, v12, v20
241 ; CHECK-NEXT: vid.v v12
242 ; CHECK-NEXT: vrsub.vi v12, v12, 15
243 ; CHECK-NEXT: lui a0, 16
244 ; CHECK-NEXT: addi a0, a0, -1
245 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
246 ; CHECK-NEXT: vmv.s.x v0, a0
247 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
248 ; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t
250 %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
251 ret <32 x i16> %v32i16
254 define <2 x i32> @v2i32(<2 x i32> %a) {
255 ; CHECK-LABEL: v2i32:
257 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
258 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
259 ; CHECK-NEXT: vslideup.vi v9, v8, 1
260 ; CHECK-NEXT: vmv1r.v v8, v9
262 %v2i32 = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
266 define <4 x i32> @v2i32_2(<2 x i32> %a, < 2 x i32> %b) {
267 ; CHECK-LABEL: v2i32_2:
269 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
270 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
271 ; CHECK-NEXT: vslideup.vi v10, v8, 1
272 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
273 ; CHECK-NEXT: vslideup.vi v8, v9, 1
274 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
275 ; CHECK-NEXT: vslideup.vi v8, v10, 2
277 %v4i32 = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
281 define <4 x i32> @v4i32(<4 x i32> %a) {
282 ; CHECK-LABEL: v4i32:
284 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
285 ; CHECK-NEXT: vid.v v9
286 ; CHECK-NEXT: vrsub.vi v10, v9, 3
287 ; CHECK-NEXT: vrgather.vv v9, v8, v10
288 ; CHECK-NEXT: vmv.v.v v8, v9
290 %v4i32 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
294 define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) {
295 ; CHECK-LABEL: v4i32_2:
297 ; CHECK-NEXT: vmv1r.v v12, v9
298 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
299 ; CHECK-NEXT: vid.v v9
300 ; CHECK-NEXT: vrsub.vi v13, v9, 7
301 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
302 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v13
303 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
304 ; CHECK-NEXT: vrsub.vi v8, v9, 3
305 ; CHECK-NEXT: vmv.v.i v0, 15
306 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
307 ; CHECK-NEXT: vrgatherei16.vv v10, v12, v8, v0.t
308 ; CHECK-NEXT: vmv.v.v v8, v10
310 %v8i32 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
314 define <8 x i32> @v8i32(<8 x i32> %a) {
315 ; CHECK-LABEL: v8i32:
317 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
318 ; CHECK-NEXT: vid.v v10
319 ; CHECK-NEXT: vrsub.vi v12, v10, 7
320 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
321 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
322 ; CHECK-NEXT: vmv.v.v v8, v10
324 %v8i32 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
328 define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) {
329 ; CHECK-LABEL: v8i32_2:
331 ; CHECK-NEXT: vmv2r.v v16, v10
332 ; CHECK-NEXT: vmv2r.v v12, v8
333 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
334 ; CHECK-NEXT: vid.v v14
335 ; CHECK-NEXT: vrsub.vi v18, v14, 15
336 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
337 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v18
338 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
339 ; CHECK-NEXT: vrsub.vi v12, v14, 7
340 ; CHECK-NEXT: li a0, 255
341 ; CHECK-NEXT: vmv.s.x v0, a0
342 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
343 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
345 %v16i32 = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
346 ret <16 x i32> %v16i32
349 define <16 x i32> @v16i32(<16 x i32> %a) {
350 ; CHECK-LABEL: v16i32:
352 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
353 ; CHECK-NEXT: vid.v v12
354 ; CHECK-NEXT: vrsub.vi v16, v12, 15
355 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
356 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
357 ; CHECK-NEXT: vmv.v.v v8, v12
359 %v16i32 = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
360 ret <16 x i32> %v16i32
363 define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) {
364 ; CHECK-LABEL: v16i32_2:
366 ; CHECK-NEXT: lui a0, %hi(.LCPI23_0)
367 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0)
368 ; CHECK-NEXT: li a1, 32
369 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
370 ; CHECK-NEXT: vle16.v v20, (a0)
371 ; CHECK-NEXT: vmv4r.v v24, v12
372 ; CHECK-NEXT: vmv4r.v v16, v8
373 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v20
374 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
375 ; CHECK-NEXT: vid.v v16
376 ; CHECK-NEXT: vrsub.vi v16, v16, 15
377 ; CHECK-NEXT: lui a0, 16
378 ; CHECK-NEXT: addi a0, a0, -1
379 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
380 ; CHECK-NEXT: vmv.s.x v0, a0
381 ; CHECK-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
383 %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
384 ret <32 x i32> %v32i32
387 define <2 x i64> @v2i64(<2 x i64> %a) {
388 ; CHECK-LABEL: v2i64:
390 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
391 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
392 ; CHECK-NEXT: vslideup.vi v9, v8, 1
393 ; CHECK-NEXT: vmv.v.v v8, v9
395 %v2i64 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
399 define <4 x i64> @v2i64_2(<2 x i64> %a, < 2 x i64> %b) {
400 ; CHECK-LABEL: v2i64_2:
402 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
403 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
404 ; CHECK-NEXT: vslideup.vi v10, v8, 1
405 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
406 ; CHECK-NEXT: vslideup.vi v8, v9, 1
407 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
408 ; CHECK-NEXT: vslideup.vi v8, v10, 2
410 %v4i64 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
414 define <4 x i64> @v4i64(<4 x i64> %a) {
415 ; CHECK-LABEL: v4i64:
417 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
418 ; CHECK-NEXT: vid.v v10
419 ; CHECK-NEXT: vrsub.vi v12, v10, 3
420 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
421 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
422 ; CHECK-NEXT: vmv.v.v v8, v10
424 %v4i64 = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
428 define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) {
429 ; CHECK-LABEL: v4i64_2:
431 ; CHECK-NEXT: vmv2r.v v16, v10
432 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
433 ; CHECK-NEXT: vid.v v10
434 ; CHECK-NEXT: vrsub.vi v11, v10, 7
435 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
436 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v11
437 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
438 ; CHECK-NEXT: vrsub.vi v8, v10, 3
439 ; CHECK-NEXT: vmv.v.i v0, 15
440 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
441 ; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
442 ; CHECK-NEXT: vmv.v.v v8, v12
444 %v8i64 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
448 define <2 x half> @v2f16(<2 x half> %a) {
449 ; CHECK-LABEL: v2f16:
451 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
452 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
453 ; CHECK-NEXT: vslideup.vi v9, v8, 1
454 ; CHECK-NEXT: vmv1r.v v8, v9
456 %v2f16 = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
457 ret <2 x half> %v2f16
460 define <4 x half> @v2f16_2(<2 x half> %a, <2 x half> %b) {
461 ; CHECK-LABEL: v2f16_2:
463 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
464 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
465 ; CHECK-NEXT: vslideup.vi v10, v8, 1
466 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
467 ; CHECK-NEXT: vslideup.vi v8, v9, 1
468 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
469 ; CHECK-NEXT: vslideup.vi v8, v10, 2
471 %v4f16 = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
472 ret <4 x half> %v4f16
475 define <4 x half> @v4f16(<4 x half> %a) {
476 ; CHECK-LABEL: v4f16:
478 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
479 ; CHECK-NEXT: vid.v v9
480 ; CHECK-NEXT: vrsub.vi v10, v9, 3
481 ; CHECK-NEXT: vrgather.vv v9, v8, v10
482 ; CHECK-NEXT: vmv1r.v v8, v9
484 %v4f16 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
485 ret <4 x half> %v4f16
488 define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) {
489 ; CHECK-LABEL: v4f16_2:
491 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
492 ; CHECK-NEXT: vid.v v11
493 ; CHECK-NEXT: vrsub.vi v12, v11, 7
494 ; CHECK-NEXT: vrgather.vv v10, v8, v12
495 ; CHECK-NEXT: vmv.v.i v0, 15
496 ; CHECK-NEXT: vrsub.vi v8, v11, 3
497 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
498 ; CHECK-NEXT: vmv.v.v v8, v10
500 %v8f16 = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
501 ret <8 x half> %v8f16
504 define <8 x half> @v8f16(<8 x half> %a) {
505 ; CHECK-LABEL: v8f16:
507 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
508 ; CHECK-NEXT: vid.v v9
509 ; CHECK-NEXT: vrsub.vi v10, v9, 7
510 ; CHECK-NEXT: vrgather.vv v9, v8, v10
511 ; CHECK-NEXT: vmv.v.v v8, v9
513 %v8f16 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
514 ret <8 x half> %v8f16
517 define <16 x half> @v8f16_2(<8 x half> %a, <8 x half> %b) {
518 ; CHECK-LABEL: v8f16_2:
520 ; CHECK-NEXT: vmv1r.v v12, v9
521 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
522 ; CHECK-NEXT: vid.v v14
523 ; CHECK-NEXT: vrsub.vi v16, v14, 15
524 ; CHECK-NEXT: vrgather.vv v10, v8, v16
525 ; CHECK-NEXT: vrsub.vi v8, v14, 7
526 ; CHECK-NEXT: li a0, 255
527 ; CHECK-NEXT: vmv.s.x v0, a0
528 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t
529 ; CHECK-NEXT: vmv.v.v v8, v10
531 %v16f16 = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
532 ret <16 x half> %v16f16
535 define <16 x half> @v16f16(<16 x half> %a) {
536 ; CHECK-LABEL: v16f16:
538 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
539 ; CHECK-NEXT: vid.v v10
540 ; CHECK-NEXT: vrsub.vi v12, v10, 15
541 ; CHECK-NEXT: vrgather.vv v10, v8, v12
542 ; CHECK-NEXT: vmv.v.v v8, v10
544 %v16f16 = shufflevector <16 x half> %a, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
545 ret <16 x half> %v16f16
548 define <32 x half> @v16f16_2(<16 x half> %a) {
549 ; CHECK-LABEL: v16f16_2:
551 ; CHECK-NEXT: lui a0, %hi(.LCPI35_0)
552 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0)
553 ; CHECK-NEXT: li a1, 32
554 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
555 ; CHECK-NEXT: vle16.v v16, (a0)
556 ; CHECK-NEXT: vrgather.vv v12, v8, v16
557 ; CHECK-NEXT: vmv.v.v v8, v12
559 %v32f16 = shufflevector <16 x half> %a, <16 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
560 ret <32 x half> %v32f16
563 define <2 x float> @v2f32(<2 x float> %a) {
564 ; CHECK-LABEL: v2f32:
566 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
567 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
568 ; CHECK-NEXT: vslideup.vi v9, v8, 1
569 ; CHECK-NEXT: vmv1r.v v8, v9
571 %v2f32 = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
572 ret <2 x float> %v2f32
575 define <4 x float> @v2f32_2(<2 x float> %a, <2 x float> %b) {
576 ; CHECK-LABEL: v2f32_2:
578 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
579 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
580 ; CHECK-NEXT: vslideup.vi v10, v8, 1
581 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
582 ; CHECK-NEXT: vslideup.vi v8, v9, 1
583 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
584 ; CHECK-NEXT: vslideup.vi v8, v10, 2
586 %v4f32 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
587 ret <4 x float> %v4f32
590 define <4 x float> @v4f32(<4 x float> %a) {
591 ; CHECK-LABEL: v4f32:
593 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
594 ; CHECK-NEXT: vid.v v9
595 ; CHECK-NEXT: vrsub.vi v10, v9, 3
596 ; CHECK-NEXT: vrgather.vv v9, v8, v10
597 ; CHECK-NEXT: vmv.v.v v8, v9
599 %v4f32 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
600 ret <4 x float> %v4f32
603 define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) {
604 ; CHECK-LABEL: v4f32_2:
606 ; CHECK-NEXT: vmv1r.v v12, v9
607 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
608 ; CHECK-NEXT: vid.v v9
609 ; CHECK-NEXT: vrsub.vi v13, v9, 7
610 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
611 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v13
612 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
613 ; CHECK-NEXT: vrsub.vi v8, v9, 3
614 ; CHECK-NEXT: vmv.v.i v0, 15
615 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
616 ; CHECK-NEXT: vrgatherei16.vv v10, v12, v8, v0.t
617 ; CHECK-NEXT: vmv.v.v v8, v10
619 %v8f32 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
620 ret <8 x float> %v8f32
623 define <8 x float> @v8f32(<8 x float> %a) {
624 ; CHECK-LABEL: v8f32:
626 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
627 ; CHECK-NEXT: vid.v v10
628 ; CHECK-NEXT: vrsub.vi v12, v10, 7
629 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
630 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
631 ; CHECK-NEXT: vmv.v.v v8, v10
633 %v8f32 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
634 ret <8 x float> %v8f32
637 define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) {
638 ; CHECK-LABEL: v8f32_2:
640 ; CHECK-NEXT: vmv2r.v v16, v10
641 ; CHECK-NEXT: vmv2r.v v12, v8
642 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
643 ; CHECK-NEXT: vid.v v14
644 ; CHECK-NEXT: vrsub.vi v18, v14, 15
645 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
646 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v18
647 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
648 ; CHECK-NEXT: vrsub.vi v12, v14, 7
649 ; CHECK-NEXT: li a0, 255
650 ; CHECK-NEXT: vmv.s.x v0, a0
651 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
652 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
654 %v16f32 = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
655 ret <16 x float> %v16f32
658 define <2 x double> @v2f64(<2 x double> %a) {
659 ; CHECK-LABEL: v2f64:
661 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
662 ; CHECK-NEXT: vslidedown.vi v9, v8, 1
663 ; CHECK-NEXT: vslideup.vi v9, v8, 1
664 ; CHECK-NEXT: vmv.v.v v8, v9
666 %v2f64 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
667 ret <2 x double> %v2f64
670 define <4 x double> @v2f64_2(<2 x double> %a, < 2 x double> %b) {
671 ; CHECK-LABEL: v2f64_2:
673 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
674 ; CHECK-NEXT: vslidedown.vi v10, v8, 1
675 ; CHECK-NEXT: vslideup.vi v10, v8, 1
676 ; CHECK-NEXT: vslidedown.vi v8, v9, 1
677 ; CHECK-NEXT: vslideup.vi v8, v9, 1
678 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
679 ; CHECK-NEXT: vslideup.vi v8, v10, 2
681 %v4f64 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
682 ret <4 x double> %v4f64
685 define <4 x double> @v4f64(<4 x double> %a) {
686 ; CHECK-LABEL: v4f64:
688 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
689 ; CHECK-NEXT: vid.v v10
690 ; CHECK-NEXT: vrsub.vi v12, v10, 3
691 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
692 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
693 ; CHECK-NEXT: vmv.v.v v8, v10
695 %v4f64 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
696 ret <4 x double> %v4f64
699 define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) {
700 ; CHECK-LABEL: v4f64_2:
702 ; CHECK-NEXT: vmv2r.v v16, v10
703 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
704 ; CHECK-NEXT: vid.v v10
705 ; CHECK-NEXT: vrsub.vi v11, v10, 7
706 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
707 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v11
708 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
709 ; CHECK-NEXT: vrsub.vi v8, v10, 3
710 ; CHECK-NEXT: vmv.v.i v0, 15
711 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
712 ; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
713 ; CHECK-NEXT: vmv.v.v v8, v12
715 %v8f64 = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
716 ret <8 x double> %v8f64
719 define <32 x i8> @v32i8(<32 x i8> %a) {
720 ; CHECK-LABEL: v32i8:
722 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
723 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
724 ; CHECK-NEXT: li a1, 32
725 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
726 ; CHECK-NEXT: vle8.v v12, (a0)
727 ; CHECK-NEXT: vrgather.vv v10, v8, v12
728 ; CHECK-NEXT: vmv.v.v v8, v10
730 %v32i8 = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
734 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: