1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7 define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
8 ; CHECK-LABEL: trn1.v8i8:
10 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
11 ; CHECK-NEXT: vid.v v11
12 ; CHECK-NEXT: vrgather.vv v10, v8, v11
13 ; CHECK-NEXT: li a0, 170
14 ; CHECK-NEXT: vmv.s.x v0, a0
15 ; CHECK-NEXT: vadd.vi v8, v11, -1
16 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
17 ; CHECK-NEXT: vmv1r.v v8, v10
19 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
23 define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
24 ; CHECK-LABEL: trn2.v8i8:
26 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
27 ; CHECK-NEXT: vid.v v11
28 ; CHECK-NEXT: vadd.vi v12, v11, 1
29 ; CHECK-NEXT: li a0, 170
30 ; CHECK-NEXT: vmv.s.x v0, a0
31 ; CHECK-NEXT: vrgather.vv v10, v8, v12
32 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
33 ; CHECK-NEXT: vmv1r.v v8, v10
35 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
39 define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
40 ; CHECK-LABEL: trn1.v16i8:
42 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
43 ; CHECK-NEXT: vid.v v11
44 ; CHECK-NEXT: vrgather.vv v10, v8, v11
45 ; CHECK-NEXT: vadd.vi v8, v11, -1
46 ; CHECK-NEXT: lui a0, 11
47 ; CHECK-NEXT: addi a0, a0, -1366
48 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
49 ; CHECK-NEXT: vmv.s.x v0, a0
50 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
51 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
52 ; CHECK-NEXT: vmv.v.v v8, v10
54 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
58 define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
59 ; CHECK-LABEL: trn2.v16i8:
61 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
62 ; CHECK-NEXT: vid.v v11
63 ; CHECK-NEXT: vadd.vi v12, v11, 1
64 ; CHECK-NEXT: vrgather.vv v10, v8, v12
65 ; CHECK-NEXT: lui a0, 11
66 ; CHECK-NEXT: addi a0, a0, -1366
67 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
68 ; CHECK-NEXT: vmv.s.x v0, a0
69 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
70 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
71 ; CHECK-NEXT: vmv.v.v v8, v10
73 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
77 define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
78 ; CHECK-LABEL: trn1.v4i16:
80 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
81 ; CHECK-NEXT: vid.v v11
82 ; CHECK-NEXT: vrgather.vv v10, v8, v11
83 ; CHECK-NEXT: vmv.v.i v0, 10
84 ; CHECK-NEXT: vadd.vi v8, v11, -1
85 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
86 ; CHECK-NEXT: vmv1r.v v8, v10
88 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
92 define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
93 ; CHECK-LABEL: trn2.v4i16:
95 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
96 ; CHECK-NEXT: vid.v v11
97 ; CHECK-NEXT: vadd.vi v12, v11, 1
98 ; CHECK-NEXT: vmv.v.i v0, 10
99 ; CHECK-NEXT: vrgather.vv v10, v8, v12
100 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
101 ; CHECK-NEXT: vmv1r.v v8, v10
103 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
107 define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
108 ; CHECK-LABEL: trn1.v8i16:
110 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
111 ; CHECK-NEXT: vid.v v11
112 ; CHECK-NEXT: vrgather.vv v10, v8, v11
113 ; CHECK-NEXT: li a0, 170
114 ; CHECK-NEXT: vmv.s.x v0, a0
115 ; CHECK-NEXT: vadd.vi v8, v11, -1
116 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
117 ; CHECK-NEXT: vmv.v.v v8, v10
119 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
123 define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
124 ; CHECK-LABEL: trn2.v8i16:
126 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
127 ; CHECK-NEXT: vid.v v11
128 ; CHECK-NEXT: vadd.vi v12, v11, 1
129 ; CHECK-NEXT: li a0, 170
130 ; CHECK-NEXT: vmv.s.x v0, a0
131 ; CHECK-NEXT: vrgather.vv v10, v8, v12
132 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
133 ; CHECK-NEXT: vmv.v.v v8, v10
135 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
139 define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
140 ; CHECK-LABEL: trn1.v2i32:
142 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
143 ; CHECK-NEXT: vslideup.vi v8, v9, 1
145 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
149 define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
150 ; CHECK-LABEL: trn2.v2i32:
152 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
153 ; CHECK-NEXT: vmv.v.i v0, 2
154 ; CHECK-NEXT: vrgather.vi v10, v8, 1
155 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
156 ; CHECK-NEXT: vmv1r.v v8, v10
158 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
162 define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
163 ; CHECK-LABEL: trn1.v4i32:
165 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
166 ; CHECK-NEXT: vid.v v11
167 ; CHECK-NEXT: vrgather.vv v10, v8, v11
168 ; CHECK-NEXT: vmv.v.i v0, 10
169 ; CHECK-NEXT: vadd.vi v8, v11, -1
170 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
171 ; CHECK-NEXT: vmv.v.v v8, v10
173 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
177 define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
178 ; CHECK-LABEL: trn2.v4i32:
180 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
181 ; CHECK-NEXT: vid.v v11
182 ; CHECK-NEXT: vadd.vi v12, v11, 1
183 ; CHECK-NEXT: vmv.v.i v0, 10
184 ; CHECK-NEXT: vrgather.vv v10, v8, v12
185 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
186 ; CHECK-NEXT: vmv.v.v v8, v10
188 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
192 define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
193 ; CHECK-LABEL: trn1.v2i64:
195 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
196 ; CHECK-NEXT: vslideup.vi v8, v9, 1
198 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
202 define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
203 ; CHECK-LABEL: trn2.v2i64:
205 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
206 ; CHECK-NEXT: vmv.v.i v0, 2
207 ; CHECK-NEXT: vrgather.vi v10, v8, 1
208 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
209 ; CHECK-NEXT: vmv.v.v v8, v10
211 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
215 define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
216 ; CHECK-LABEL: trn1.v2f32:
218 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
219 ; CHECK-NEXT: vslideup.vi v8, v9, 1
221 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
222 ret <2 x float> %tmp0
225 define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
226 ; CHECK-LABEL: trn2.v2f32:
228 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
229 ; CHECK-NEXT: vmv.v.i v0, 2
230 ; CHECK-NEXT: vrgather.vi v10, v8, 1
231 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
232 ; CHECK-NEXT: vmv1r.v v8, v10
234 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
235 ret <2 x float> %tmp0
238 define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
239 ; CHECK-LABEL: trn1.v4f32:
241 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
242 ; CHECK-NEXT: vid.v v11
243 ; CHECK-NEXT: vrgather.vv v10, v8, v11
244 ; CHECK-NEXT: vmv.v.i v0, 10
245 ; CHECK-NEXT: vadd.vi v8, v11, -1
246 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
247 ; CHECK-NEXT: vmv.v.v v8, v10
249 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
250 ret <4 x float> %tmp0
253 define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
254 ; CHECK-LABEL: trn2.v4f32:
256 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
257 ; CHECK-NEXT: vid.v v11
258 ; CHECK-NEXT: vadd.vi v12, v11, 1
259 ; CHECK-NEXT: vmv.v.i v0, 10
260 ; CHECK-NEXT: vrgather.vv v10, v8, v12
261 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
262 ; CHECK-NEXT: vmv.v.v v8, v10
264 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
265 ret <4 x float> %tmp0
268 define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
269 ; CHECK-LABEL: trn1.v2f64:
271 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
272 ; CHECK-NEXT: vslideup.vi v8, v9, 1
274 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
275 ret <2 x double> %tmp0
278 define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {
279 ; CHECK-LABEL: trn2.v2f64:
281 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
282 ; CHECK-NEXT: vmv.v.i v0, 2
283 ; CHECK-NEXT: vrgather.vi v10, v8, 1
284 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
285 ; CHECK-NEXT: vmv.v.v v8, v10
287 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
288 ret <2 x double> %tmp0
291 define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) {
292 ; CHECK-LABEL: trn1.v4f16:
294 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
295 ; CHECK-NEXT: vid.v v11
296 ; CHECK-NEXT: vrgather.vv v10, v8, v11
297 ; CHECK-NEXT: vmv.v.i v0, 10
298 ; CHECK-NEXT: vadd.vi v8, v11, -1
299 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
300 ; CHECK-NEXT: vmv1r.v v8, v10
302 %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
306 define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) {
307 ; CHECK-LABEL: trn2.v4f16:
309 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
310 ; CHECK-NEXT: vid.v v11
311 ; CHECK-NEXT: vadd.vi v12, v11, 1
312 ; CHECK-NEXT: vmv.v.i v0, 10
313 ; CHECK-NEXT: vrgather.vv v10, v8, v12
314 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
315 ; CHECK-NEXT: vmv1r.v v8, v10
317 %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
321 define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) {
322 ; CHECK-LABEL: trn1.v8f16:
324 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
325 ; CHECK-NEXT: vid.v v11
326 ; CHECK-NEXT: vrgather.vv v10, v8, v11
327 ; CHECK-NEXT: li a0, 170
328 ; CHECK-NEXT: vmv.s.x v0, a0
329 ; CHECK-NEXT: vadd.vi v8, v11, -1
330 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
331 ; CHECK-NEXT: vmv.v.v v8, v10
333 %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
337 define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
338 ; CHECK-LABEL: trn2.v8f16:
340 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
341 ; CHECK-NEXT: vid.v v11
342 ; CHECK-NEXT: vadd.vi v12, v11, 1
343 ; CHECK-NEXT: li a0, 170
344 ; CHECK-NEXT: vmv.s.x v0, a0
345 ; CHECK-NEXT: vrgather.vv v10, v8, v12
346 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
347 ; CHECK-NEXT: vmv.v.v v8, v10
349 %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
352 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: