1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
8 ; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
10 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
11 ; CHECK-NEXT: vle8.v v8, (a0)
12 ; CHECK-NEXT: vle8.v v9, (a1)
13 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
14 ; CHECK-NEXT: vmv.s.x v8, a2
15 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
16 ; CHECK-NEXT: vslideup.vi v8, v9, 2
17 ; CHECK-NEXT: vse8.v v8, (a0)
19 %v1 = load <2 x i8>, ptr %a
20 %v2 = load <2 x i8>, ptr %b
21 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
22 %ins = insertelement <4 x i8> %concat, i8 %x, i32 0
23 store <4 x i8> %ins, ptr %a
27 define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
28 ; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
30 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
31 ; CHECK-NEXT: vle8.v v8, (a0)
32 ; CHECK-NEXT: vle8.v v9, (a1)
33 ; CHECK-NEXT: vmv.s.x v10, a2
34 ; CHECK-NEXT: vslideup.vi v8, v10, 1
35 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
36 ; CHECK-NEXT: vslideup.vi v8, v9, 2
37 ; CHECK-NEXT: vse8.v v8, (a0)
39 %v1 = load <2 x i8>, ptr %a
40 %v2 = load <2 x i8>, ptr %b
41 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42 %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
43 store <4 x i8> %ins, ptr %a
47 define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
48 ; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
50 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
51 ; CHECK-NEXT: vle8.v v8, (a1)
52 ; CHECK-NEXT: vle8.v v9, (a0)
53 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
54 ; CHECK-NEXT: vmv.s.x v8, a2
55 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
56 ; CHECK-NEXT: vslideup.vi v9, v8, 2
57 ; CHECK-NEXT: vse8.v v9, (a0)
59 %v1 = load <2 x i8>, ptr %a
60 %v2 = load <2 x i8>, ptr %b
61 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
62 %ins = insertelement <4 x i8> %concat, i8 %x, i32 2
63 store <4 x i8> %ins, ptr %a
67 define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
68 ; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
70 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
71 ; CHECK-NEXT: vle8.v v8, (a1)
72 ; CHECK-NEXT: vle8.v v9, (a0)
73 ; CHECK-NEXT: vmv.s.x v10, a2
74 ; CHECK-NEXT: vslideup.vi v8, v10, 1
75 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
76 ; CHECK-NEXT: vslideup.vi v9, v8, 2
77 ; CHECK-NEXT: vse8.v v9, (a0)
79 %v1 = load <2 x i8>, ptr %a
80 %v2 = load <2 x i8>, ptr %b
81 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
82 %ins = insertelement <4 x i8> %concat, i8 %x, i32 3
83 store <4 x i8> %ins, ptr %a
87 define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
88 ; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
90 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
91 ; RV32-NEXT: vle64.v v8, (a0)
92 ; RV32-NEXT: vle64.v v10, (a1)
93 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
94 ; RV32-NEXT: vslide1down.vx v8, v8, a2
95 ; RV32-NEXT: vslide1down.vx v8, v8, a3
96 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
97 ; RV32-NEXT: vslideup.vi v8, v10, 2
98 ; RV32-NEXT: vse64.v v8, (a0)
101 ; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
103 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
104 ; RV64-NEXT: vle64.v v8, (a0)
105 ; RV64-NEXT: vle64.v v10, (a1)
106 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
107 ; RV64-NEXT: vmv.s.x v8, a2
108 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
109 ; RV64-NEXT: vslideup.vi v8, v10, 2
110 ; RV64-NEXT: vse64.v v8, (a0)
112 %v1 = load <2 x i64>, ptr %a
113 %v2 = load <2 x i64>, ptr %b
114 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
115 %ins = insertelement <4 x i64> %concat, i64 %x, i32 0
116 store <4 x i64> %ins, ptr %a
120 define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
121 ; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
123 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
124 ; RV32-NEXT: vle64.v v8, (a0)
125 ; RV32-NEXT: vle64.v v10, (a1)
126 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
127 ; RV32-NEXT: vslide1down.vx v9, v8, a2
128 ; RV32-NEXT: vslide1down.vx v9, v9, a3
129 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
130 ; RV32-NEXT: vslideup.vi v8, v9, 1
131 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
132 ; RV32-NEXT: vslideup.vi v8, v10, 2
133 ; RV32-NEXT: vse64.v v8, (a0)
136 ; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
138 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
139 ; RV64-NEXT: vle64.v v8, (a0)
140 ; RV64-NEXT: vle64.v v10, (a1)
141 ; RV64-NEXT: vmv.s.x v9, a2
142 ; RV64-NEXT: vslideup.vi v8, v9, 1
143 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
144 ; RV64-NEXT: vslideup.vi v8, v10, 2
145 ; RV64-NEXT: vse64.v v8, (a0)
147 %v1 = load <2 x i64>, ptr %a
148 %v2 = load <2 x i64>, ptr %b
149 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
150 %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
151 store <4 x i64> %ins, ptr %a
155 define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
156 ; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
158 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
159 ; RV32-NEXT: vle64.v v8, (a1)
160 ; RV32-NEXT: vle64.v v10, (a0)
161 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
162 ; RV32-NEXT: vslide1down.vx v8, v8, a2
163 ; RV32-NEXT: vslide1down.vx v8, v8, a3
164 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
165 ; RV32-NEXT: vslideup.vi v10, v8, 2
166 ; RV32-NEXT: vse64.v v10, (a0)
169 ; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
171 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
172 ; RV64-NEXT: vle64.v v8, (a1)
173 ; RV64-NEXT: vle64.v v10, (a0)
174 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
175 ; RV64-NEXT: vmv.s.x v8, a2
176 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
177 ; RV64-NEXT: vslideup.vi v10, v8, 2
178 ; RV64-NEXT: vse64.v v10, (a0)
180 %v1 = load <2 x i64>, ptr %a
181 %v2 = load <2 x i64>, ptr %b
182 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
183 %ins = insertelement <4 x i64> %concat, i64 %x, i32 2
184 store <4 x i64> %ins, ptr %a
188 define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
189 ; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
191 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
192 ; RV32-NEXT: vle64.v v8, (a0)
193 ; RV32-NEXT: vle64.v v10, (a1)
194 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
195 ; RV32-NEXT: vslide1down.vx v9, v8, a2
196 ; RV32-NEXT: vslide1down.vx v9, v9, a3
197 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
198 ; RV32-NEXT: vslideup.vi v10, v9, 1
199 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
200 ; RV32-NEXT: vslideup.vi v8, v10, 2
201 ; RV32-NEXT: vse64.v v8, (a0)
204 ; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
206 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
207 ; RV64-NEXT: vle64.v v8, (a1)
208 ; RV64-NEXT: vle64.v v10, (a0)
209 ; RV64-NEXT: vmv.s.x v9, a2
210 ; RV64-NEXT: vslideup.vi v8, v9, 1
211 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
212 ; RV64-NEXT: vslideup.vi v10, v8, 2
213 ; RV64-NEXT: vse64.v v10, (a0)
215 %v1 = load <2 x i64>, ptr %a
216 %v2 = load <2 x i64>, ptr %b
217 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
218 %ins = insertelement <4 x i64> %concat, i64 %x, i32 3
219 store <4 x i64> %ins, ptr %a