1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v \
3 ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
4 ; RUN: | FileCheck %s --check-prefix=CHECK
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v \
6 ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
7 ; RUN: | FileCheck %s --check-prefix=CHECK
9 define void @deinterleave3_0_i8(ptr %in, ptr %out) {
10 ; CHECK-LABEL: deinterleave3_0_i8:
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
13 ; CHECK-NEXT: vle8.v v8, (a0)
14 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
15 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
16 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
17 ; CHECK-NEXT: vle8.v v9, (a0)
18 ; CHECK-NEXT: li a0, 73
19 ; CHECK-NEXT: vmv.s.x v0, a0
20 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
21 ; CHECK-NEXT: vslidedown.vi v10, v8, 8
22 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
23 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
24 ; CHECK-NEXT: vrgather.vv v10, v8, v9
25 ; CHECK-NEXT: vse8.v v10, (a1)
28 %0 = load <16 x i8>, ptr %in, align 1
29 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 undef, i32 undef>
30 store <8 x i8> %shuffle.i5, ptr %out, align 1
34 define void @deinterleave3_8_i8(ptr %in, ptr %out) {
35 ; CHECK-LABEL: deinterleave3_8_i8:
36 ; CHECK: # %bb.0: # %entry
37 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
38 ; CHECK-NEXT: vle8.v v8, (a0)
39 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
40 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0)
41 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
42 ; CHECK-NEXT: vle8.v v9, (a0)
43 ; CHECK-NEXT: li a0, 146
44 ; CHECK-NEXT: vmv.s.x v0, a0
45 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
46 ; CHECK-NEXT: vslidedown.vi v10, v8, 8
47 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
48 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
49 ; CHECK-NEXT: vrgather.vv v10, v8, v9
50 ; CHECK-NEXT: vse8.v v10, (a1)
53 %0 = load <16 x i8>, ptr %in, align 1
54 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 undef, i32 undef, i32 undef>
55 store <8 x i8> %shuffle.i5, ptr %out, align 1
59 define void @deinterleave4_0_i8(ptr %in, ptr %out) {
60 ; CHECK-LABEL: deinterleave4_0_i8:
61 ; CHECK: # %bb.0: # %entry
62 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
63 ; CHECK-NEXT: vle8.v v8, (a0)
64 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
65 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
66 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
67 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
68 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
69 ; CHECK-NEXT: vse8.v v8, (a1)
72 %0 = load <16 x i8>, ptr %in, align 1
73 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef>
74 store <8 x i8> %shuffle.i5, ptr %out, align 1
78 define void @deinterleave4_8_i8(ptr %in, ptr %out) {
79 ; CHECK-LABEL: deinterleave4_8_i8:
80 ; CHECK: # %bb.0: # %entry
81 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
82 ; CHECK-NEXT: vle8.v v8, (a0)
83 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
84 ; CHECK-NEXT: vnsrl.wi v8, v8, 8
85 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
86 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
87 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
88 ; CHECK-NEXT: vse8.v v8, (a1)
91 %0 = load <16 x i8>, ptr %in, align 1
92 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
93 store <8 x i8> %shuffle.i5, ptr %out, align 1
97 define void @deinterleave5_0_i8(ptr %in, ptr %out) {
98 ; CHECK-LABEL: deinterleave5_0_i8:
99 ; CHECK: # %bb.0: # %entry
100 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
101 ; CHECK-NEXT: vle8.v v8, (a0)
102 ; CHECK-NEXT: li a0, 33
103 ; CHECK-NEXT: vmv.s.x v0, a0
104 ; CHECK-NEXT: lui a0, 28704
105 ; CHECK-NEXT: addi a0, a0, 1280
106 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
107 ; CHECK-NEXT: vslidedown.vi v9, v8, 8
108 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
109 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
110 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
111 ; CHECK-NEXT: vmv.v.x v9, a0
112 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
113 ; CHECK-NEXT: vrgather.vv v10, v8, v9
114 ; CHECK-NEXT: vse8.v v10, (a1)
117 %0 = load <16 x i8>, ptr %in, align 1
118 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 5, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
119 store <8 x i8> %shuffle.i5, ptr %out, align 1
123 define void @deinterleave5_8_i8(ptr %in, ptr %out) {
124 ; CHECK-LABEL: deinterleave5_8_i8:
125 ; CHECK: # %bb.0: # %entry
126 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
127 ; CHECK-NEXT: vle8.v v8, (a0)
128 ; CHECK-NEXT: li a0, 66
129 ; CHECK-NEXT: vmv.v.i v0, 4
130 ; CHECK-NEXT: vmv.s.x v9, a0
131 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
132 ; CHECK-NEXT: vcompress.vm v10, v8, v9
133 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
134 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
135 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
136 ; CHECK-NEXT: vrgather.vi v10, v8, 3, v0.t
137 ; CHECK-NEXT: vse8.v v10, (a1)
140 %0 = load <16 x i8>, ptr %in, align 1
141 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
142 store <8 x i8> %shuffle.i5, ptr %out, align 1
146 define void @deinterleave6_0_i8(ptr %in, ptr %out) {
147 ; CHECK-LABEL: deinterleave6_0_i8:
148 ; CHECK: # %bb.0: # %entry
149 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
150 ; CHECK-NEXT: vle8.v v8, (a0)
151 ; CHECK-NEXT: li a0, 65
152 ; CHECK-NEXT: vmv.v.i v0, 4
153 ; CHECK-NEXT: vmv.s.x v9, a0
154 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
155 ; CHECK-NEXT: vcompress.vm v10, v8, v9
156 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
157 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
158 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
159 ; CHECK-NEXT: vrgather.vi v10, v8, 4, v0.t
160 ; CHECK-NEXT: vse8.v v10, (a1)
163 %0 = load <16 x i8>, ptr %in, align 1
164 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 6, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
165 store <8 x i8> %shuffle.i5, ptr %out, align 1
169 define void @deinterleave6_8_i8(ptr %in, ptr %out) {
170 ; CHECK-LABEL: deinterleave6_8_i8:
171 ; CHECK: # %bb.0: # %entry
172 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
173 ; CHECK-NEXT: vle8.v v8, (a0)
174 ; CHECK-NEXT: li a0, 130
175 ; CHECK-NEXT: vmv.v.i v0, 4
176 ; CHECK-NEXT: vmv.s.x v9, a0
177 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
178 ; CHECK-NEXT: vcompress.vm v10, v8, v9
179 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
180 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
181 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
182 ; CHECK-NEXT: vrgather.vi v10, v8, 5, v0.t
183 ; CHECK-NEXT: vse8.v v10, (a1)
186 %0 = load <16 x i8>, ptr %in, align 1
187 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 7, i32 13, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
188 store <8 x i8> %shuffle.i5, ptr %out, align 1
192 define void @deinterleave7_0_i8(ptr %in, ptr %out) {
193 ; CHECK-LABEL: deinterleave7_0_i8:
194 ; CHECK: # %bb.0: # %entry
195 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
196 ; CHECK-NEXT: vle8.v v8, (a0)
197 ; CHECK-NEXT: li a0, 129
198 ; CHECK-NEXT: vmv.v.i v0, 4
199 ; CHECK-NEXT: vmv.s.x v9, a0
200 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
201 ; CHECK-NEXT: vcompress.vm v10, v8, v9
202 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
203 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
204 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
205 ; CHECK-NEXT: vrgather.vi v10, v8, 6, v0.t
206 ; CHECK-NEXT: vse8.v v10, (a1)
209 %0 = load <16 x i8>, ptr %in, align 1
210 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
211 store <8 x i8> %shuffle.i5, ptr %out, align 1
215 define void @deinterleave7_8_i8(ptr %in, ptr %out) {
216 ; CHECK-LABEL: deinterleave7_8_i8:
217 ; CHECK: # %bb.0: # %entry
218 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
219 ; CHECK-NEXT: vle8.v v8, (a0)
220 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
221 ; CHECK-NEXT: vmv.v.i v9, -6
222 ; CHECK-NEXT: vid.v v10
223 ; CHECK-NEXT: li a0, 6
224 ; CHECK-NEXT: vmv.v.i v0, 6
225 ; CHECK-NEXT: vmadd.vx v10, a0, v9
226 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
227 ; CHECK-NEXT: vslidedown.vi v9, v8, 8
228 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
229 ; CHECK-NEXT: vrgather.vi v11, v8, 1
230 ; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
231 ; CHECK-NEXT: vse8.v v11, (a1)
234 %0 = load <16 x i8>, ptr %in, align 1
235 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 8, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
236 store <8 x i8> %shuffle.i5, ptr %out, align 1
240 define void @deinterleave8_0_i8(ptr %in, ptr %out) {
241 ; CHECK-LABEL: deinterleave8_0_i8:
242 ; CHECK: # %bb.0: # %entry
243 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
244 ; CHECK-NEXT: vle8.v v8, (a0)
245 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
246 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
247 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
248 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
249 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
250 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
251 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
252 ; CHECK-NEXT: vse8.v v8, (a1)
255 %0 = load <16 x i8>, ptr %in, align 1
256 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
257 store <8 x i8> %shuffle.i5, ptr %out, align 1
261 define void @deinterleave8_8_i8(ptr %in, ptr %out) {
262 ; CHECK-LABEL: deinterleave8_8_i8:
263 ; CHECK: # %bb.0: # %entry
264 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
265 ; CHECK-NEXT: vle8.v v8, (a0)
266 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
267 ; CHECK-NEXT: vnsrl.wi v8, v8, 8
268 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
269 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
270 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
271 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
272 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
273 ; CHECK-NEXT: vse8.v v8, (a1)
276 %0 = load <16 x i8>, ptr %in, align 1
277 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
278 store <8 x i8> %shuffle.i5, ptr %out, align 1
282 ; Exercise the high lmul case
283 define void @deinterleave7_0_i64(ptr %in, ptr %out) {
284 ; CHECK-LABEL: deinterleave7_0_i64:
285 ; CHECK: # %bb.0: # %entry
286 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
287 ; CHECK-NEXT: vle64.v v8, (a0)
288 ; CHECK-NEXT: li a0, 129
289 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
290 ; CHECK-NEXT: vmv.v.i v0, 4
291 ; CHECK-NEXT: vmv.s.x v16, a0
292 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
293 ; CHECK-NEXT: vcompress.vm v20, v8, v16
294 ; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
295 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
296 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
297 ; CHECK-NEXT: vrgather.vi v20, v8, 6, v0.t
298 ; CHECK-NEXT: vse64.v v20, (a1)
301 %0 = load <16 x i64>, ptr %in
302 %shuffle.i5 = shufflevector <16 x i64> %0, <16 x i64> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
303 store <8 x i64> %shuffle.i5, ptr %out
307 ; Store back only the active subvector
308 define void @deinterleave4_0_i8_subvec(ptr %in, ptr %out) {
309 ; CHECK-LABEL: deinterleave4_0_i8_subvec:
310 ; CHECK: # %bb.0: # %entry
311 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
312 ; CHECK-NEXT: vle8.v v8, (a0)
313 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
314 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
315 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
316 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
317 ; CHECK-NEXT: vse8.v v8, (a1)
320 %0 = load <16 x i8>, ptr %in, align 1
321 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
322 store <4 x i8> %shuffle.i5, ptr %out, align 1
326 ; Store back only the active subvector
327 define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) {
328 ; CHECK-LABEL: deinterleave7_0_i32_subvec:
329 ; CHECK: # %bb.0: # %entry
330 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
331 ; CHECK-NEXT: vle32.v v8, (a0)
332 ; CHECK-NEXT: li a0, 129
333 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
334 ; CHECK-NEXT: vmv.v.i v0, 4
335 ; CHECK-NEXT: vmv.s.x v12, a0
336 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
337 ; CHECK-NEXT: vcompress.vm v14, v8, v12
338 ; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
339 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
340 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
341 ; CHECK-NEXT: vrgather.vi v14, v8, 6, v0.t
342 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
343 ; CHECK-NEXT: vse32.v v14, (a1)
346 %0 = load <16 x i32>, ptr %in
347 %shuffle.i5 = shufflevector <16 x i32> %0, <16 x i32> poison, <3 x i32> <i32 0, i32 7, i32 14>
348 store <3 x i32> %shuffle.i5, ptr %out
352 ; Store back only the active subvector
353 define void @deinterleave8_0_i8_subvec(ptr %in, ptr %out) {
354 ; CHECK-LABEL: deinterleave8_0_i8_subvec:
355 ; CHECK: # %bb.0: # %entry
356 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
357 ; CHECK-NEXT: vle8.v v8, (a0)
358 ; CHECK-NEXT: vslidedown.vi v9, v8, 8
359 ; CHECK-NEXT: vmv.x.s a0, v8
360 ; CHECK-NEXT: vmv.x.s a2, v9
361 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
362 ; CHECK-NEXT: vmv.v.x v8, a0
363 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
364 ; CHECK-NEXT: vse8.v v8, (a1)
367 %0 = load <16 x i8>, ptr %in, align 1
368 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <2 x i32> <i32 0, i32 8>
369 store <2 x i8> %shuffle.i5, ptr %out, align 1