1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) {
6 ; CHECK-LABEL: shuffle_v4i16:
8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
9 ; CHECK-NEXT: vmv.v.i v0, 11
10 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
11 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
13 %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
17 define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) {
18 ; CHECK-LABEL: shuffle_v8i32:
20 ; CHECK-NEXT: li a0, 203
21 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
22 ; CHECK-NEXT: vmv.s.x v0, a0
23 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
25 %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
29 define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
30 ; CHECK-LABEL: shuffle_xv_v4i16:
32 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
33 ; CHECK-NEXT: vmv.v.i v0, 9
34 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
35 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0
37 %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
41 define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
42 ; CHECK-LABEL: shuffle_vx_v4i16:
44 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
45 ; CHECK-NEXT: vmv.v.i v0, 6
46 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
47 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0
49 %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
53 define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
54 ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
56 ; CHECK-NEXT: lui a0, 4096
57 ; CHECK-NEXT: addi a0, a0, 513
58 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
59 ; CHECK-NEXT: vmv.s.x v9, a0
60 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
61 ; CHECK-NEXT: vsext.vf2 v10, v9
62 ; CHECK-NEXT: vrgather.vv v9, v8, v10
63 ; CHECK-NEXT: vmv1r.v v8, v9
65 %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
69 define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
70 ; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
72 ; CHECK-NEXT: lui a0, 4096
73 ; CHECK-NEXT: addi a0, a0, 513
74 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
75 ; CHECK-NEXT: vmv.s.x v9, a0
76 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
77 ; CHECK-NEXT: vsext.vf2 v10, v9
78 ; CHECK-NEXT: vrgather.vv v9, v8, v10
79 ; CHECK-NEXT: vmv1r.v v8, v9
81 %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
85 define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
86 ; CHECK-LABEL: vrgather_shuffle_vv_v4i16:
88 ; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
89 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
90 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
91 ; CHECK-NEXT: vle16.v v11, (a0)
92 ; CHECK-NEXT: vmv.v.i v0, 8
93 ; CHECK-NEXT: vrgather.vv v10, v8, v11
94 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
95 ; CHECK-NEXT: vmv1r.v v8, v10
97 %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
101 define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) {
102 ; CHECK-LABEL: vrgather_shuffle_xv_v4i16:
104 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
105 ; CHECK-NEXT: vid.v v9
106 ; CHECK-NEXT: vrsub.vi v10, v9, 4
107 ; CHECK-NEXT: vmv.v.i v0, 12
108 ; CHECK-NEXT: vmv.v.i v9, 5
109 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
110 ; CHECK-NEXT: vmv1r.v v8, v9
112 %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
116 define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) {
117 ; CHECK-LABEL: vrgather_shuffle_vx_v4i16:
119 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
120 ; CHECK-NEXT: vid.v v9
121 ; CHECK-NEXT: li a0, 3
122 ; CHECK-NEXT: vmul.vx v10, v9, a0
123 ; CHECK-NEXT: vmv.v.i v0, 3
124 ; CHECK-NEXT: vmv.v.i v9, 5
125 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
126 ; CHECK-NEXT: vmv1r.v v8, v9
128 %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
132 define <8 x i64> @vrgather_permute_shuffle_vu_v8i64(<8 x i64> %x) {
133 ; CHECK-LABEL: vrgather_permute_shuffle_vu_v8i64:
135 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
136 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
137 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
138 ; CHECK-NEXT: vle16.v v16, (a0)
139 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
140 ; CHECK-NEXT: vmv.v.v v8, v12
142 %s = shufflevector <8 x i64> %x, <8 x i64> poison, <8 x i32> <i32 1, i32 2, i32 0, i32 1, i32 7, i32 6, i32 0, i32 1>
146 define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) {
147 ; CHECK-LABEL: vrgather_permute_shuffle_uv_v8i64:
149 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
150 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
151 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
152 ; CHECK-NEXT: vle16.v v16, (a0)
153 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
154 ; CHECK-NEXT: vmv.v.v v8, v12
156 %s = shufflevector <8 x i64> poison, <8 x i64> %x, <8 x i32> <i32 9, i32 10, i32 8, i32 9, i32 15, i32 8, i32 8, i32 11>
160 define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
161 ; RV32-LABEL: vrgather_shuffle_vv_v8i64:
163 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
164 ; RV32-NEXT: vmv.v.i v16, 2
165 ; RV32-NEXT: li a0, 5
166 ; RV32-NEXT: vslide1down.vx v20, v16, a0
167 ; RV32-NEXT: lui a0, %hi(.LCPI11_0)
168 ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0)
169 ; RV32-NEXT: vle16.v v21, (a0)
170 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
171 ; RV32-NEXT: li a0, 164
172 ; RV32-NEXT: vmv.s.x v0, a0
173 ; RV32-NEXT: vrgatherei16.vv v16, v8, v21
174 ; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t
175 ; RV32-NEXT: vmv.v.v v8, v16
178 ; RV64-LABEL: vrgather_shuffle_vv_v8i64:
180 ; RV64-NEXT: vmv4r.v v16, v8
181 ; RV64-NEXT: lui a0, 327683
182 ; RV64-NEXT: slli a0, a0, 3
183 ; RV64-NEXT: addi a0, a0, 1
184 ; RV64-NEXT: slli a0, a0, 17
185 ; RV64-NEXT: addi a0, a0, 1
186 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
187 ; RV64-NEXT: vmv.v.x v20, a0
188 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
189 ; RV64-NEXT: vrgatherei16.vv v8, v16, v20
190 ; RV64-NEXT: li a0, 164
191 ; RV64-NEXT: vmv.s.x v0, a0
192 ; RV64-NEXT: lui a0, 163841
193 ; RV64-NEXT: slli a0, a0, 4
194 ; RV64-NEXT: addi a0, a0, 1
195 ; RV64-NEXT: slli a0, a0, 17
196 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
197 ; RV64-NEXT: vmv.v.x v16, a0
198 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
199 ; RV64-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
201 %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> <i32 1, i32 2, i32 10, i32 5, i32 1, i32 10, i32 3, i32 13>
205 define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
206 ; RV32-LABEL: vrgather_shuffle_xv_v8i64:
208 ; RV32-NEXT: lui a0, %hi(.LCPI12_0)
209 ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0)
210 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
211 ; RV32-NEXT: vle16.v v16, (a0)
212 ; RV32-NEXT: vmv.v.i v20, -1
213 ; RV32-NEXT: lui a0, %hi(.LCPI12_1)
214 ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1)
215 ; RV32-NEXT: vle16.v v17, (a0)
216 ; RV32-NEXT: li a0, 113
217 ; RV32-NEXT: vmv.s.x v0, a0
218 ; RV32-NEXT: vrgatherei16.vv v12, v20, v16
219 ; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
220 ; RV32-NEXT: vmv.v.v v8, v12
223 ; RV64-LABEL: vrgather_shuffle_xv_v8i64:
225 ; RV64-NEXT: li a0, 113
226 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
227 ; RV64-NEXT: vmv.s.x v0, a0
228 ; RV64-NEXT: lui a0, 98305
229 ; RV64-NEXT: slli a0, a0, 6
230 ; RV64-NEXT: vmv.v.x v16, a0
231 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
232 ; RV64-NEXT: vmv.v.i v12, -1
233 ; RV64-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
234 ; RV64-NEXT: vmv.v.v v8, v12
236 %s = shufflevector <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x, <8 x i32> <i32 8, i32 3, i32 6, i32 5, i32 8, i32 12, i32 14, i32 3>
240 define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
241 ; RV32-LABEL: vrgather_shuffle_vx_v8i64:
243 ; RV32-NEXT: lui a0, %hi(.LCPI13_0)
244 ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
245 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
246 ; RV32-NEXT: vle16.v v16, (a0)
247 ; RV32-NEXT: vrgatherei16.vv v12, v8, v16
248 ; RV32-NEXT: lui a0, %hi(.LCPI13_1)
249 ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1)
250 ; RV32-NEXT: vle16.v v8, (a0)
251 ; RV32-NEXT: li a0, 140
252 ; RV32-NEXT: vmv.s.x v0, a0
253 ; RV32-NEXT: vmv.v.i v16, 5
254 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
255 ; RV32-NEXT: vmv.v.v v8, v12
258 ; RV64-LABEL: vrgather_shuffle_vx_v8i64:
260 ; RV64-NEXT: lui a0, %hi(.LCPI13_0)
261 ; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0)
262 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
263 ; RV64-NEXT: vle16.v v16, (a0)
264 ; RV64-NEXT: li a0, 115
265 ; RV64-NEXT: vmv.s.x v0, a0
266 ; RV64-NEXT: vmv.v.i v12, 5
267 ; RV64-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
268 ; RV64-NEXT: vmv.v.v v8, v12
270 %s = shufflevector <8 x i64> %x, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i32> <i32 0, i32 3, i32 10, i32 9, i32 4, i32 1, i32 7, i32 14>
274 define <4 x i16> @shuffle_v8i16_to_vslidedown_1(<8 x i16> %x) {
275 ; CHECK-LABEL: shuffle_v8i16_to_vslidedown_1:
276 ; CHECK: # %bb.0: # %entry
277 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
278 ; CHECK-NEXT: vslidedown.vi v8, v8, 1
281 %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
285 define <4 x i16> @shuffle_v8i16_to_vslidedown_3(<8 x i16> %x) {
286 ; CHECK-LABEL: shuffle_v8i16_to_vslidedown_3:
287 ; CHECK: # %bb.0: # %entry
288 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
289 ; CHECK-NEXT: vslidedown.vi v8, v8, 3
292 %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
296 define <2 x i32> @shuffle_v4i32_to_vslidedown(<4 x i32> %x) {
297 ; CHECK-LABEL: shuffle_v4i32_to_vslidedown:
298 ; CHECK: # %bb.0: # %entry
299 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
300 ; CHECK-NEXT: vslidedown.vi v8, v8, 1
303 %s = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
307 define <4 x i8> @interleave_shuffles(<4 x i8> %x) {
308 ; CHECK-LABEL: interleave_shuffles:
310 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
311 ; CHECK-NEXT: vrgather.vi v9, v8, 0
312 ; CHECK-NEXT: vrgather.vi v10, v8, 1
313 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
314 ; CHECK-NEXT: vwaddu.vv v8, v9, v10
315 ; CHECK-NEXT: li a0, -1
316 ; CHECK-NEXT: vwmaccu.vx v8, a0, v10
318 %y = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
319 %z = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
320 %w = shufflevector <4 x i8> %y, <4 x i8> %z, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
324 define <8 x i8> @splat_ve4(<8 x i8> %v) {
325 ; CHECK-LABEL: splat_ve4:
327 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
328 ; CHECK-NEXT: vrgather.vi v9, v8, 4
329 ; CHECK-NEXT: vmv1r.v v8, v9
331 %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
335 define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) {
336 ; CHECK-LABEL: splat_ve4_ins_i0ve2:
338 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
339 ; CHECK-NEXT: vmv.v.i v10, 4
340 ; CHECK-NEXT: li a0, 2
341 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
342 ; CHECK-NEXT: vmv.s.x v10, a0
343 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
344 ; CHECK-NEXT: vrgather.vv v9, v8, v10
345 ; CHECK-NEXT: vmv1r.v v8, v9
347 %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
351 define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) {
352 ; CHECK-LABEL: splat_ve4_ins_i1ve3:
354 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
355 ; CHECK-NEXT: vmv.v.i v9, 3
356 ; CHECK-NEXT: vmv.v.i v10, 4
357 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
358 ; CHECK-NEXT: vslideup.vi v10, v9, 1
359 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
360 ; CHECK-NEXT: vrgather.vv v9, v8, v10
361 ; CHECK-NEXT: vmv1r.v v8, v9
363 %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
367 define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) {
368 ; CHECK-LABEL: splat_ve2_we0:
370 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
371 ; CHECK-NEXT: li a0, 66
372 ; CHECK-NEXT: vmv.s.x v0, a0
373 ; CHECK-NEXT: vrgather.vi v10, v8, 2
374 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
375 ; CHECK-NEXT: vmv1r.v v8, v10
377 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
381 define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
382 ; CHECK-LABEL: splat_ve2_we0_ins_i0ve4:
384 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
385 ; CHECK-NEXT: vmv.v.i v11, 2
386 ; CHECK-NEXT: li a0, 4
387 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
388 ; CHECK-NEXT: vmv.s.x v11, a0
389 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
390 ; CHECK-NEXT: li a0, 66
391 ; CHECK-NEXT: vmv.s.x v0, a0
392 ; CHECK-NEXT: vrgather.vv v10, v8, v11
393 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
394 ; CHECK-NEXT: vmv1r.v v8, v10
396 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 4, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
400 define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
401 ; CHECK-LABEL: splat_ve2_we0_ins_i0we4:
403 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
404 ; CHECK-NEXT: vrgather.vi v10, v8, 2
405 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
406 ; CHECK-NEXT: vmv.v.i v8, 4
407 ; CHECK-NEXT: li a0, 67
408 ; CHECK-NEXT: vmv.s.x v0, a0
409 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
410 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
411 ; CHECK-NEXT: vmv1r.v v8, v10
413 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 12, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
417 define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
418 ; CHECK-LABEL: splat_ve2_we0_ins_i2ve4:
420 ; CHECK-NEXT: lui a0, 8256
421 ; CHECK-NEXT: addi a0, a0, 514
422 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
423 ; CHECK-NEXT: vmv.v.x v11, a0
424 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
425 ; CHECK-NEXT: li a0, 66
426 ; CHECK-NEXT: vmv.s.x v0, a0
427 ; CHECK-NEXT: vrgather.vv v10, v8, v11
428 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
429 ; CHECK-NEXT: vmv1r.v v8, v10
431 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 2, i32 8, i32 2>
435 define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
436 ; CHECK-LABEL: splat_ve2_we0_ins_i2we4:
438 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
439 ; CHECK-NEXT: vmv.v.i v10, 4
440 ; CHECK-NEXT: vmv.v.i v11, 0
441 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
442 ; CHECK-NEXT: vslideup.vi v11, v10, 2
443 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
444 ; CHECK-NEXT: li a0, 70
445 ; CHECK-NEXT: vmv.s.x v0, a0
446 ; CHECK-NEXT: vrgather.vi v10, v8, 2
447 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
448 ; CHECK-NEXT: vmv1r.v v8, v10
450 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 12, i32 2, i32 2, i32 2, i32 8, i32 2>
454 define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
455 ; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
457 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
458 ; CHECK-NEXT: vmv.v.i v10, 6
459 ; CHECK-NEXT: vmv.v.i v11, 0
460 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
461 ; CHECK-NEXT: vslideup.vi v11, v10, 5
462 ; CHECK-NEXT: lui a0, 8256
463 ; CHECK-NEXT: addi a0, a0, 2
464 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
465 ; CHECK-NEXT: vmv.v.x v12, a0
466 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
467 ; CHECK-NEXT: li a0, 98
468 ; CHECK-NEXT: vmv.s.x v0, a0
469 ; CHECK-NEXT: vrgather.vv v10, v8, v12
470 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
471 ; CHECK-NEXT: vmv1r.v v8, v10
473 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 14, i32 8, i32 2>
477 define <8 x i8> @widen_splat_ve3(<4 x i8> %v) {
478 ; CHECK-LABEL: widen_splat_ve3:
480 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
481 ; CHECK-NEXT: vrgather.vi v9, v8, 3
482 ; CHECK-NEXT: vmv1r.v v8, v9
484 %shuf = shufflevector <4 x i8> %v, <4 x i8> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
488 define <4 x i16> @slidedown_v4i16(<4 x i16> %x) {
489 ; CHECK-LABEL: slidedown_v4i16:
491 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
492 ; CHECK-NEXT: vslidedown.vi v8, v8, 1
494 %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
498 define <8 x i32> @slidedown_v8i32(<8 x i32> %x) {
499 ; CHECK-LABEL: slidedown_v8i32:
501 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
502 ; CHECK-NEXT: vslidedown.vi v8, v8, 3
504 %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
508 define <4 x i16> @slideup_v4i16(<4 x i16> %x) {
509 ; CHECK-LABEL: slideup_v4i16:
511 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
512 ; CHECK-NEXT: vslideup.vi v9, v8, 1
513 ; CHECK-NEXT: vmv1r.v v8, v9
515 %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
519 define <8 x i32> @slideup_v8i32(<8 x i32> %x) {
520 ; CHECK-LABEL: slideup_v8i32:
522 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
523 ; CHECK-NEXT: vslideup.vi v10, v8, 3
524 ; CHECK-NEXT: vmv.v.v v8, v10
526 %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
530 define <8 x i16> @splice_unary(<8 x i16> %x) {
531 ; CHECK-LABEL: splice_unary:
533 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
534 ; CHECK-NEXT: vslidedown.vi v9, v8, 2
535 ; CHECK-NEXT: vslideup.vi v9, v8, 6
536 ; CHECK-NEXT: vmv.v.v v8, v9
538 %s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
542 define <8 x i32> @splice_unary2(<8 x i32> %x) {
543 ; CHECK-LABEL: splice_unary2:
545 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
546 ; CHECK-NEXT: vslidedown.vi v10, v8, 5
547 ; CHECK-NEXT: vslideup.vi v10, v8, 3
548 ; CHECK-NEXT: vmv.v.v v8, v10
550 %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
554 define <8 x i16> @splice_binary(<8 x i16> %x, <8 x i16> %y) {
555 ; CHECK-LABEL: splice_binary:
557 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
558 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
559 ; CHECK-NEXT: vslideup.vi v8, v9, 6
561 %s = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
565 define <8 x i32> @splice_binary2(<8 x i32> %x, <8 x i32> %y) {
566 ; CHECK-LABEL: splice_binary2:
568 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
569 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
570 ; CHECK-NEXT: vslideup.vi v8, v10, 3
572 %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
576 define <4 x i16> @shuffle_shuffle_vslidedown(<16 x i16> %0) {
577 ; CHECK-LABEL: shuffle_shuffle_vslidedown:
578 ; CHECK: # %bb.0: # %entry
579 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
580 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
583 %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
584 %2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
585 %3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
586 %4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
587 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
591 define <8 x i8> @concat_4xi8_start(<8 x i8> %v, <8 x i8> %w) {
592 ; CHECK-LABEL: concat_4xi8_start:
594 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
595 ; CHECK-NEXT: vslideup.vi v8, v9, 4
597 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
601 define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) {
602 ; CHECK-LABEL: concat_4xi8_start_undef:
604 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
605 ; CHECK-NEXT: vslideup.vi v8, v9, 4
607 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 undef, i32 10, i32 11>
611 define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) {
612 ; CHECK-LABEL: concat_4xi8_start_undef_at_start:
614 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
615 ; CHECK-NEXT: vid.v v11
616 ; CHECK-NEXT: vrgather.vv v10, v8, v11
617 ; CHECK-NEXT: li a0, 224
618 ; CHECK-NEXT: vmv.s.x v0, a0
619 ; CHECK-NEXT: vadd.vi v8, v11, -4
620 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
621 ; CHECK-NEXT: vmv1r.v v8, v10
623 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 9, i32 10, i32 11>
627 define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {
628 ; CHECK-LABEL: merge_start_into_end_non_contiguous:
630 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
631 ; CHECK-NEXT: vid.v v11
632 ; CHECK-NEXT: vrgather.vv v10, v8, v11
633 ; CHECK-NEXT: li a0, 144
634 ; CHECK-NEXT: vmv.s.x v0, a0
635 ; CHECK-NEXT: vadd.vi v8, v11, -4
636 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
637 ; CHECK-NEXT: vmv1r.v v8, v10
639 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>
643 define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) {
644 ; CHECK-LABEL: merge_end_into_end:
646 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
647 ; CHECK-NEXT: vmv.v.v v9, v8
648 ; CHECK-NEXT: vmv1r.v v8, v9
650 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
654 define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) {
655 ; CHECK-LABEL: merge_start_into_middle:
657 ; CHECK-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
658 ; CHECK-NEXT: vslideup.vi v8, v9, 1
660 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
664 define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {
665 ; CHECK-LABEL: merge_start_into_start:
667 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
668 ; CHECK-NEXT: vmv.v.v v8, v9
670 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
674 define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
675 ; CHECK-LABEL: merge_slidedown:
677 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
678 ; CHECK-NEXT: vid.v v11
679 ; CHECK-NEXT: vadd.vi v12, v11, 1
680 ; CHECK-NEXT: li a0, 195
681 ; CHECK-NEXT: vmv.s.x v0, a0
682 ; CHECK-NEXT: vrgather.vv v10, v8, v12
683 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
684 ; CHECK-NEXT: vmv1r.v v8, v10
686 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15>
690 ; This should slide %v down by 2 and %w up by 1 before merging them
691 define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) {
692 ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
694 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
695 ; CHECK-NEXT: vid.v v11
696 ; CHECK-NEXT: vadd.vi v12, v11, 2
697 ; CHECK-NEXT: vrgather.vv v10, v8, v12
698 ; CHECK-NEXT: li a0, 234
699 ; CHECK-NEXT: vmv.s.x v0, a0
700 ; CHECK-NEXT: vadd.vi v8, v11, -1
701 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
702 ; CHECK-NEXT: vmv1r.v v8, v10
704 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
708 ; This shouldn't generate a vmerge because the elements of %w are not consecutive
709 define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
710 ; CHECK-LABEL: unmergable:
712 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
713 ; CHECK-NEXT: vid.v v10
714 ; CHECK-NEXT: vadd.vi v11, v10, 2
715 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
716 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
717 ; CHECK-NEXT: vle8.v v12, (a0)
718 ; CHECK-NEXT: li a0, 234
719 ; CHECK-NEXT: vmv.s.x v0, a0
720 ; CHECK-NEXT: vrgather.vv v10, v8, v11
721 ; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t
722 ; CHECK-NEXT: vmv1r.v v8, v10
724 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
728 ; Make sure we use a vmv.v.i to load the mask constant.
729 define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) {
730 ; CHECK-LABEL: shuffle_v8i32_2:
732 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
733 ; CHECK-NEXT: vmv.v.i v0, -13
734 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
735 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
737 %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>