1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV32 %s
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV64 %s
6 define <8 x i1> @v8i1_v16i1(<16 x i1>) {
7 ; RV32-LABEL: v8i1_v16i1:
9 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10 ; RV32-NEXT: vmv.x.s a0, v0
11 ; RV32-NEXT: slli a1, a0, 18
12 ; RV32-NEXT: srli a2, a0, 31
13 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
14 ; RV32-NEXT: vmv.v.x v8, a2
15 ; RV32-NEXT: slli a2, a0, 27
16 ; RV32-NEXT: srli a1, a1, 31
17 ; RV32-NEXT: vslide1down.vx v8, v8, a1
18 ; RV32-NEXT: slli a1, a0, 26
19 ; RV32-NEXT: srli a1, a1, 31
20 ; RV32-NEXT: vmv.v.x v9, a1
21 ; RV32-NEXT: slli a1, a0, 28
22 ; RV32-NEXT: srli a2, a2, 31
23 ; RV32-NEXT: vslide1down.vx v8, v8, a2
24 ; RV32-NEXT: slli a2, a0, 19
25 ; RV32-NEXT: srli a2, a2, 31
26 ; RV32-NEXT: vslide1down.vx v9, v9, a2
27 ; RV32-NEXT: slli a2, a0, 24
28 ; RV32-NEXT: slli a0, a0, 29
29 ; RV32-NEXT: srli a1, a1, 31
30 ; RV32-NEXT: srli a2, a2, 31
31 ; RV32-NEXT: srli a0, a0, 31
32 ; RV32-NEXT: vslide1down.vx v8, v8, a1
33 ; RV32-NEXT: vslide1down.vx v9, v9, a2
34 ; RV32-NEXT: vmv.v.i v0, 15
35 ; RV32-NEXT: vslide1down.vx v9, v9, a0
36 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
37 ; RV32-NEXT: vand.vi v8, v8, 1
38 ; RV32-NEXT: vmsne.vi v0, v8, 0
41 ; RV64-LABEL: v8i1_v16i1:
43 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
44 ; RV64-NEXT: vmv.x.s a0, v0
45 ; RV64-NEXT: slli a1, a0, 50
46 ; RV64-NEXT: srli a2, a0, 63
47 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
48 ; RV64-NEXT: vmv.v.x v8, a2
49 ; RV64-NEXT: slli a2, a0, 59
50 ; RV64-NEXT: srli a1, a1, 63
51 ; RV64-NEXT: vslide1down.vx v8, v8, a1
52 ; RV64-NEXT: slli a1, a0, 58
53 ; RV64-NEXT: srli a1, a1, 63
54 ; RV64-NEXT: vmv.v.x v9, a1
55 ; RV64-NEXT: slli a1, a0, 60
56 ; RV64-NEXT: srli a2, a2, 63
57 ; RV64-NEXT: vslide1down.vx v8, v8, a2
58 ; RV64-NEXT: slli a2, a0, 51
59 ; RV64-NEXT: srli a2, a2, 63
60 ; RV64-NEXT: vslide1down.vx v9, v9, a2
61 ; RV64-NEXT: slli a2, a0, 56
62 ; RV64-NEXT: slli a0, a0, 61
63 ; RV64-NEXT: srli a1, a1, 63
64 ; RV64-NEXT: srli a2, a2, 63
65 ; RV64-NEXT: srli a0, a0, 63
66 ; RV64-NEXT: vslide1down.vx v8, v8, a1
67 ; RV64-NEXT: vslide1down.vx v9, v9, a2
68 ; RV64-NEXT: vmv.v.i v0, 15
69 ; RV64-NEXT: vslide1down.vx v9, v9, a0
70 ; RV64-NEXT: vslidedown.vi v8, v9, 4, v0.t
71 ; RV64-NEXT: vand.vi v8, v8, 1
72 ; RV64-NEXT: vmsne.vi v0, v8, 0
74 %2 = shufflevector <16 x i1> %0, <16 x i1> poison, <8 x i32> <i32 5, i32 12, i32 7, i32 2, i32 15, i32 13, i32 4, i32 3>
78 define <4 x i32> @v4i32_v8i32(<8 x i32>) {
79 ; CHECK-LABEL: v4i32_v8i32:
81 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
82 ; CHECK-NEXT: vid.v v10
83 ; CHECK-NEXT: vmv.v.i v0, 5
84 ; CHECK-NEXT: vsrl.vi v10, v10, 1
85 ; CHECK-NEXT: vrsub.vi v11, v10, 3
86 ; CHECK-NEXT: vrgather.vv v10, v8, v11
87 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
88 ; CHECK-NEXT: vslidedown.vi v8, v8, 4
89 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
90 ; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t
91 ; CHECK-NEXT: vmv.v.v v8, v10
93 %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 5, i32 3, i32 7, i32 2>
97 define <4 x i32> @v4i32_v16i32(<16 x i32>) {
98 ; RV32-LABEL: v4i32_v16i32:
100 ; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
101 ; RV32-NEXT: vmv.v.i v12, 1
102 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
103 ; RV32-NEXT: vmv.v.i v14, 6
104 ; RV32-NEXT: li a0, 32
105 ; RV32-NEXT: vmv.v.i v0, 10
106 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
107 ; RV32-NEXT: vslideup.vi v14, v12, 1
108 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
109 ; RV32-NEXT: vnsrl.wx v12, v8, a0
110 ; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma
111 ; RV32-NEXT: vslidedown.vi v8, v8, 8
112 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
113 ; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t
114 ; RV32-NEXT: vmv1r.v v8, v12
117 ; RV64-LABEL: v4i32_v16i32:
119 ; RV64-NEXT: li a0, 32
120 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
121 ; RV64-NEXT: vmv.v.i v0, 10
122 ; RV64-NEXT: vnsrl.wx v12, v8, a0
123 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma
124 ; RV64-NEXT: vslidedown.vi v8, v8, 8
125 ; RV64-NEXT: li a0, 3
126 ; RV64-NEXT: slli a0, a0, 33
127 ; RV64-NEXT: addi a0, a0, 1
128 ; RV64-NEXT: slli a0, a0, 16
129 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
130 ; RV64-NEXT: vmv.v.x v10, a0
131 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu
132 ; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t
133 ; RV64-NEXT: vmv1r.v v8, v12
135 %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 1, i32 9, i32 5, i32 14>
139 define <4 x i32> @v4i32_v32i32(<32 x i32>) {
140 ; RV32-LABEL: v4i32_v32i32:
142 ; RV32-NEXT: addi sp, sp, -256
143 ; RV32-NEXT: .cfi_def_cfa_offset 256
144 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
145 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
146 ; RV32-NEXT: .cfi_offset ra, -4
147 ; RV32-NEXT: .cfi_offset s0, -8
148 ; RV32-NEXT: addi s0, sp, 256
149 ; RV32-NEXT: .cfi_def_cfa s0, 0
150 ; RV32-NEXT: andi sp, sp, -128
151 ; RV32-NEXT: li a0, 32
152 ; RV32-NEXT: mv a1, sp
153 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
154 ; RV32-NEXT: vslidedown.vi v16, v8, 1
155 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
156 ; RV32-NEXT: vse32.v v8, (a1)
157 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
158 ; RV32-NEXT: vslidedown.vi v8, v8, 4
159 ; RV32-NEXT: lw a0, 36(sp)
160 ; RV32-NEXT: vmv.x.s a1, v16
161 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
162 ; RV32-NEXT: vmv.v.x v9, a1
163 ; RV32-NEXT: lw a1, 120(sp)
164 ; RV32-NEXT: vslide1down.vx v9, v9, a0
165 ; RV32-NEXT: vmv.x.s a0, v8
166 ; RV32-NEXT: vslide1down.vx v8, v9, a0
167 ; RV32-NEXT: vslide1down.vx v8, v8, a1
168 ; RV32-NEXT: addi sp, s0, -256
169 ; RV32-NEXT: .cfi_def_cfa sp, 256
170 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
171 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
172 ; RV32-NEXT: .cfi_restore ra
173 ; RV32-NEXT: .cfi_restore s0
174 ; RV32-NEXT: addi sp, sp, 256
175 ; RV32-NEXT: .cfi_def_cfa_offset 0
178 ; RV64-LABEL: v4i32_v32i32:
180 ; RV64-NEXT: addi sp, sp, -256
181 ; RV64-NEXT: .cfi_def_cfa_offset 256
182 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
183 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
184 ; RV64-NEXT: .cfi_offset ra, -8
185 ; RV64-NEXT: .cfi_offset s0, -16
186 ; RV64-NEXT: addi s0, sp, 256
187 ; RV64-NEXT: .cfi_def_cfa s0, 0
188 ; RV64-NEXT: andi sp, sp, -128
189 ; RV64-NEXT: li a0, 32
190 ; RV64-NEXT: mv a1, sp
191 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
192 ; RV64-NEXT: vslidedown.vi v16, v8, 1
193 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
194 ; RV64-NEXT: vse32.v v8, (a1)
195 ; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
196 ; RV64-NEXT: vslidedown.vi v8, v8, 4
197 ; RV64-NEXT: lw a0, 36(sp)
198 ; RV64-NEXT: vmv.x.s a1, v16
199 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
200 ; RV64-NEXT: vmv.v.x v9, a1
201 ; RV64-NEXT: lw a1, 120(sp)
202 ; RV64-NEXT: vslide1down.vx v9, v9, a0
203 ; RV64-NEXT: vmv.x.s a0, v8
204 ; RV64-NEXT: vslide1down.vx v8, v9, a0
205 ; RV64-NEXT: vslide1down.vx v8, v8, a1
206 ; RV64-NEXT: addi sp, s0, -256
207 ; RV64-NEXT: .cfi_def_cfa sp, 256
208 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
209 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
210 ; RV64-NEXT: .cfi_restore ra
211 ; RV64-NEXT: .cfi_restore s0
212 ; RV64-NEXT: addi sp, sp, 256
213 ; RV64-NEXT: .cfi_def_cfa_offset 0
215 %2 = shufflevector <32 x i32> %0, <32 x i32> poison, <4 x i32> <i32 1, i32 9, i32 4, i32 30>
219 define <16 x i1> @v16i1_v8i1(<8 x i1>) {
220 ; CHECK-LABEL: v16i1_v8i1:
222 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
223 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
224 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
225 ; CHECK-NEXT: vle8.v v8, (a0)
226 ; CHECK-NEXT: vmv.v.i v9, 0
227 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
228 ; CHECK-NEXT: vrgather.vv v10, v9, v8
229 ; CHECK-NEXT: vmsne.vi v0, v10, 0
231 %2 = shufflevector <8 x i1> %0, <8 x i1> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 5, i32 1, i32 2, i32 0, i32 6, i32 2, i32 3, i32 0, i32 7, i32 1, i32 2, i32 0, i32 4>
235 define <8 x i32> @v8i32_v4i32(<4 x i32>) {
236 ; CHECK-LABEL: v8i32_v4i32:
238 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
239 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
240 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
241 ; CHECK-NEXT: vle16.v v12, (a0)
242 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
243 ; CHECK-NEXT: vmv.v.v v8, v10
245 %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3>
249 define <16 x i32> @v16i32_v4i32(<4 x i32>) {
250 ; CHECK-LABEL: v16i32_v4i32:
252 ; CHECK-NEXT: lui a0, 2
253 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
254 ; CHECK-NEXT: vmv.v.i v9, 3
255 ; CHECK-NEXT: addi a1, a0, 265
256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
257 ; CHECK-NEXT: vmv.s.x v0, a1
258 ; CHECK-NEXT: lui a1, 4
259 ; CHECK-NEXT: addi a1, a1, 548
260 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
261 ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
262 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
263 ; CHECK-NEXT: vmv.s.x v0, a1
264 ; CHECK-NEXT: addi a0, a0, -1856
265 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
266 ; CHECK-NEXT: vmerge.vim v9, v9, 0, v0
267 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
268 ; CHECK-NEXT: vmv.s.x v0, a0
269 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
270 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
271 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
272 ; CHECK-NEXT: vsext.vf2 v16, v9
273 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
274 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
275 ; CHECK-NEXT: vmv.v.v v8, v12
277 %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
281 define <32 x i32> @v32i32_v4i32(<4 x i32>) {
282 ; CHECK-LABEL: v32i32_v4i32:
284 ; CHECK-NEXT: li a0, 32
285 ; CHECK-NEXT: lui a1, 135432
286 ; CHECK-NEXT: addi a1, a1, 1161
287 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
288 ; CHECK-NEXT: vmv.s.x v0, a1
289 ; CHECK-NEXT: lui a1, 270865
290 ; CHECK-NEXT: addi a1, a1, 548
291 ; CHECK-NEXT: vmv.s.x v9, a1
292 ; CHECK-NEXT: lui a1, 100550
293 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
294 ; CHECK-NEXT: vmv.v.i v10, 3
295 ; CHECK-NEXT: addi a0, a1, 64
296 ; CHECK-NEXT: vmerge.vim v18, v10, 2, v0
297 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
298 ; CHECK-NEXT: vmv.s.x v16, a0
299 ; CHECK-NEXT: vmv1r.v v0, v9
300 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
301 ; CHECK-NEXT: vmerge.vim v18, v18, 0, v0
302 ; CHECK-NEXT: vmv1r.v v0, v16
303 ; CHECK-NEXT: vmerge.vim v16, v18, 1, v0
304 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
305 ; CHECK-NEXT: vsext.vf2 v24, v16
306 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
307 ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
308 ; CHECK-NEXT: vmv.v.v v8, v16
310 %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
314 define <32 x i8> @vnsrl_v32i8_v64i8(<64 x i8> %in) {
315 ; CHECK-LABEL: vnsrl_v32i8_v64i8:
317 ; CHECK-NEXT: li a0, 32
318 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
319 ; CHECK-NEXT: vnsrl.wi v12, v8, 8
320 ; CHECK-NEXT: vmv.v.v v8, v12
322 %res = shufflevector <64 x i8> %in, <64 x i8> poison, <32 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63>