1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefix=VLA %s
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefix=VLA %s
5 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefix=VLS %s
6 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefix=VLS %s
8 define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) {
9 ; VLA-LABEL: concat_2xv4i32:
11 ; VLA-NEXT: vmv1r.v v10, v9
12 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13 ; VLA-NEXT: vslideup.vi v8, v10, 4
16 ; VLS-LABEL: concat_2xv4i32:
19 %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
23 define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
24 ; VLA-LABEL: concat_4xv2i32:
26 ; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
27 ; VLA-NEXT: vslideup.vi v10, v11, 2
28 ; VLA-NEXT: vslideup.vi v8, v9, 2
29 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
30 ; VLA-NEXT: vslideup.vi v8, v10, 4
33 ; VLS-LABEL: concat_4xv2i32:
35 ; VLS-NEXT: vmv1r.v v13, v10
36 ; VLS-NEXT: vmv1r.v v12, v8
37 ; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
38 ; VLS-NEXT: vslideup.vi v13, v11, 2
39 ; VLS-NEXT: vslideup.vi v12, v9, 2
40 ; VLS-NEXT: vmv2r.v v8, v12
42 %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43 %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
44 %abcd = shufflevector <4 x i32> %ab, <4 x i32> %cd, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
48 define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x i32> %d, <1 x i32> %e, <1 x i32> %f, <1 x i32> %g, <1 x i32> %h) {
49 ; VLA-LABEL: concat_8xv1i32:
51 ; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
52 ; VLA-NEXT: vslideup.vi v14, v15, 1
53 ; VLA-NEXT: vslideup.vi v12, v13, 1
54 ; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
55 ; VLA-NEXT: vslideup.vi v12, v14, 2
56 ; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
57 ; VLA-NEXT: vslideup.vi v10, v11, 1
58 ; VLA-NEXT: vslideup.vi v8, v9, 1
59 ; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
60 ; VLA-NEXT: vslideup.vi v8, v10, 2
61 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
62 ; VLA-NEXT: vslideup.vi v8, v12, 4
65 ; VLS-LABEL: concat_8xv1i32:
67 ; VLS-NEXT: vmv1r.v v17, v12
68 ; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
69 ; VLS-NEXT: vslideup.vi v14, v15, 1
70 ; VLS-NEXT: vmv1r.v v16, v8
71 ; VLS-NEXT: vslideup.vi v17, v13, 1
72 ; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
73 ; VLS-NEXT: vslideup.vi v17, v14, 2
74 ; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
75 ; VLS-NEXT: vslideup.vi v10, v11, 1
76 ; VLS-NEXT: vslideup.vi v16, v9, 1
77 ; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
78 ; VLS-NEXT: vslideup.vi v16, v10, 2
79 ; VLS-NEXT: vmv2r.v v8, v16
81 %ab = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
82 %cd = shufflevector <1 x i32> %c, <1 x i32> %d, <2 x i32> <i32 0, i32 1>
83 %abcd = shufflevector <2 x i32> %ab, <2 x i32> %cd, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84 %ef = shufflevector <1 x i32> %e, <1 x i32> %f, <2 x i32> <i32 0, i32 1>
85 %gh = shufflevector <1 x i32> %g, <1 x i32> %h, <2 x i32> <i32 0, i32 1>
86 %efgh = shufflevector <2 x i32> %ef, <2 x i32> %gh, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
87 %abcdefgh = shufflevector <4 x i32> %abcd, <4 x i32> %efgh, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
88 ret <8 x i32> %abcdefgh
91 define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) {
92 ; VLA-LABEL: concat_2xv8i32:
94 ; VLA-NEXT: vmv2r.v v12, v10
95 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
96 ; VLA-NEXT: vslideup.vi v8, v12, 8
99 ; VLS-LABEL: concat_2xv8i32:
102 %v = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
106 define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
107 ; VLA-LABEL: concat_4xv4i32:
109 ; VLA-NEXT: vmv1r.v v14, v11
110 ; VLA-NEXT: vmv1r.v v12, v10
111 ; VLA-NEXT: vmv1r.v v10, v9
112 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
113 ; VLA-NEXT: vslideup.vi v12, v14, 4
114 ; VLA-NEXT: vslideup.vi v8, v10, 4
115 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
116 ; VLA-NEXT: vslideup.vi v8, v12, 8
119 ; VLS-LABEL: concat_4xv4i32:
122 %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
123 %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
124 %abcd = shufflevector <8 x i32> %ab, <8 x i32> %cd, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
128 define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d, <2 x i32> %e, <2 x i32> %f, <2 x i32> %g, <2 x i32> %h) {
129 ; VLA-LABEL: concat_8xv2i32:
131 ; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
132 ; VLA-NEXT: vslideup.vi v14, v15, 2
133 ; VLA-NEXT: vslideup.vi v12, v13, 2
134 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
135 ; VLA-NEXT: vslideup.vi v12, v14, 4
136 ; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
137 ; VLA-NEXT: vslideup.vi v10, v11, 2
138 ; VLA-NEXT: vslideup.vi v8, v9, 2
139 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
140 ; VLA-NEXT: vslideup.vi v8, v10, 4
141 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
142 ; VLA-NEXT: vslideup.vi v8, v12, 8
145 ; VLS-LABEL: concat_8xv2i32:
147 ; VLS-NEXT: vmv1r.v v19, v14
148 ; VLS-NEXT: vmv1r.v v18, v12
149 ; VLS-NEXT: vmv1r.v v17, v10
150 ; VLS-NEXT: vmv1r.v v16, v8
151 ; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
152 ; VLS-NEXT: vslideup.vi v19, v15, 2
153 ; VLS-NEXT: vslideup.vi v18, v13, 2
154 ; VLS-NEXT: vslideup.vi v17, v11, 2
155 ; VLS-NEXT: vslideup.vi v16, v9, 2
156 ; VLS-NEXT: vmv4r.v v8, v16
158 %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
159 %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
160 %abcd = shufflevector <4 x i32> %ab, <4 x i32> %cd, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
161 %ef = shufflevector <2 x i32> %e, <2 x i32> %f, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
162 %gh = shufflevector <2 x i32> %g, <2 x i32> %h, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163 %efgh = shufflevector <4 x i32> %ef, <4 x i32> %gh, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
164 %abcdefgh = shufflevector <8 x i32> %abcd, <8 x i32> %efgh, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
165 ret <16 x i32> %abcdefgh
168 define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) {
169 ; VLA-LABEL: concat_2xv16i32:
171 ; VLA-NEXT: vmv4r.v v16, v12
172 ; VLA-NEXT: li a0, 32
173 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma
174 ; VLA-NEXT: vslideup.vi v8, v16, 16
177 ; VLS-LABEL: concat_2xv16i32:
180 %ab = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
184 define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
185 ; VLA-LABEL: concat_4xv8i32:
187 ; VLA-NEXT: vmv2r.v v20, v14
188 ; VLA-NEXT: vmv2r.v v16, v12
189 ; VLA-NEXT: vmv2r.v v12, v10
190 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
191 ; VLA-NEXT: vslideup.vi v16, v20, 8
192 ; VLA-NEXT: vslideup.vi v8, v12, 8
193 ; VLA-NEXT: li a0, 32
194 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma
195 ; VLA-NEXT: vslideup.vi v8, v16, 16
198 ; VLS-LABEL: concat_4xv8i32:
201 %ab = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
202 %cd = shufflevector <8 x i32> %c, <8 x i32> %d, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
203 %abcd = shufflevector <16 x i32> %ab, <16 x i32> %cd, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
207 define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) {
208 ; VLA-LABEL: concat_8xv4i32:
210 ; VLA-NEXT: vmv1r.v v18, v15
211 ; VLA-NEXT: vmv1r.v v20, v14
212 ; VLA-NEXT: vmv1r.v v22, v13
213 ; VLA-NEXT: vmv1r.v v16, v12
214 ; VLA-NEXT: vmv1r.v v14, v11
215 ; VLA-NEXT: vmv1r.v v12, v10
216 ; VLA-NEXT: vmv1r.v v10, v9
217 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
218 ; VLA-NEXT: vslideup.vi v20, v18, 4
219 ; VLA-NEXT: vslideup.vi v16, v22, 4
220 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
221 ; VLA-NEXT: vslideup.vi v16, v20, 8
222 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
223 ; VLA-NEXT: vslideup.vi v12, v14, 4
224 ; VLA-NEXT: vslideup.vi v8, v10, 4
225 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
226 ; VLA-NEXT: vslideup.vi v8, v12, 8
227 ; VLA-NEXT: li a0, 32
228 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma
229 ; VLA-NEXT: vslideup.vi v8, v16, 16
232 ; VLS-LABEL: concat_8xv4i32:
235 %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
236 %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
237 %abcd = shufflevector <8 x i32> %ab, <8 x i32> %cd, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
238 %ef = shufflevector <4 x i32> %e, <4 x i32> %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
239 %gh = shufflevector <4 x i32> %g, <4 x i32> %h, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240 %efgh = shufflevector <8 x i32> %ef, <8 x i32> %gh, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
241 %abcdefgh = shufflevector <16 x i32> %abcd, <16 x i32> %efgh, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
242 ret <32 x i32> %abcdefgh