1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
4 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32
6 define void @buildvec_vid_v16i8(ptr %x) {
7 ; CHECK-LABEL: buildvec_vid_v16i8:
9 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
10 ; CHECK-NEXT: vid.v v8
11 ; CHECK-NEXT: vse8.v v8, (a0)
13 store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
17 define void @buildvec_vid_undefelts_v16i8(ptr %x) {
18 ; CHECK-LABEL: buildvec_vid_undefelts_v16i8:
20 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
21 ; CHECK-NEXT: vid.v v8
22 ; CHECK-NEXT: vse8.v v8, (a0)
24 store <16 x i8> <i8 0, i8 1, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
28 ; TODO: Could do VID then insertelement on missing elements
29 define void @buildvec_notquite_vid_v16i8(ptr %x) {
30 ; CHECK-LABEL: buildvec_notquite_vid_v16i8:
32 ; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
33 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0)
34 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
35 ; CHECK-NEXT: vle8.v v8, (a1)
36 ; CHECK-NEXT: vse8.v v8, (a0)
38 store <16 x i8> <i8 0, i8 1, i8 3, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
42 define void @buildvec_vid_plus_imm_v16i8(ptr %x) {
43 ; CHECK-LABEL: buildvec_vid_plus_imm_v16i8:
45 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
46 ; CHECK-NEXT: vid.v v8
47 ; CHECK-NEXT: vadd.vi v8, v8, 2
48 ; CHECK-NEXT: vse8.v v8, (a0)
50 store <16 x i8> <i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17>, ptr %x
54 define void @buildvec_vid_plus_nonimm_v16i8(ptr %x) {
55 ; CHECK-LABEL: buildvec_vid_plus_nonimm_v16i8:
57 ; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
58 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0)
59 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
60 ; CHECK-NEXT: vle8.v v8, (a1)
61 ; CHECK-NEXT: vse8.v v8, (a0)
63 store <16 x i8> <i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115>, ptr %x
67 define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
68 ; CHECK-LABEL: buildvec_vid_mpy_imm_v16i8:
70 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
71 ; CHECK-NEXT: vid.v v8
72 ; CHECK-NEXT: li a1, 3
73 ; CHECK-NEXT: vmul.vx v8, v8, a1
74 ; CHECK-NEXT: vse8.v v8, (a0)
76 store <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>, ptr %x
80 define <4 x i8> @buildvec_vid_step2_add0_v4i8() {
81 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
83 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
84 ; CHECK-NEXT: vid.v v8
85 ; CHECK-NEXT: vadd.vv v8, v8, v8
87 ret <4 x i8> <i8 0, i8 2, i8 4, i8 6>
90 define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef0() {
91 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef0:
93 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
94 ; CHECK-NEXT: vid.v v8
95 ; CHECK-NEXT: vadd.vv v8, v8, v8
97 ret <4 x i8> <i8 undef, i8 2, i8 4, i8 6>
100 define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef1() {
101 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef1:
103 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
104 ; CHECK-NEXT: vid.v v8
105 ; CHECK-NEXT: vadd.vv v8, v8, v8
107 ret <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>
110 define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef2() {
111 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef2:
113 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
114 ; CHECK-NEXT: vid.v v8
115 ; CHECK-NEXT: vadd.vv v8, v8, v8
117 ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>
120 define <4 x i8> @buildvec_vid_step2_add1_v4i8() {
121 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
123 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
124 ; CHECK-NEXT: vid.v v8
125 ; CHECK-NEXT: vadd.vv v8, v8, v8
126 ; CHECK-NEXT: vadd.vi v8, v8, 1
128 ret <4 x i8> <i8 1, i8 3, i8 5, i8 7>
131 define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef0() {
132 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef0:
134 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
135 ; CHECK-NEXT: vid.v v8
136 ; CHECK-NEXT: vadd.vv v8, v8, v8
137 ; CHECK-NEXT: vadd.vi v8, v8, 1
139 ret <4 x i8> <i8 undef, i8 3, i8 5, i8 7>
142 define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef1() {
143 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef1:
145 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
146 ; CHECK-NEXT: vid.v v8
147 ; CHECK-NEXT: vadd.vv v8, v8, v8
148 ; CHECK-NEXT: vadd.vi v8, v8, 1
150 ret <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>
153 define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef2() {
154 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef2:
156 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
157 ; CHECK-NEXT: vid.v v8
158 ; CHECK-NEXT: vadd.vv v8, v8, v8
159 ; CHECK-NEXT: vadd.vi v8, v8, 1
161 ret <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>
164 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8() {
165 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
167 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
168 ; CHECK-NEXT: vid.v v8
169 ; CHECK-NEXT: vrsub.vi v8, v8, 0
171 ret <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>
174 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef0() {
175 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef0:
177 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
178 ; CHECK-NEXT: vid.v v8
179 ; CHECK-NEXT: vrsub.vi v8, v8, 0
181 ret <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>
184 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef1() {
185 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef1:
187 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
188 ; CHECK-NEXT: vid.v v8
189 ; CHECK-NEXT: vrsub.vi v8, v8, 0
191 ret <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>
194 define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef2() {
195 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef2:
197 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
198 ; CHECK-NEXT: vid.v v8
199 ; CHECK-NEXT: vrsub.vi v8, v8, 0
201 ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>
204 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8() {
205 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
207 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
208 ; CHECK-NEXT: vid.v v8
209 ; CHECK-NEXT: vadd.vv v8, v8, v8
210 ; CHECK-NEXT: vrsub.vi v8, v8, 0
212 ret <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>
215 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef0() {
216 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef0:
218 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
219 ; CHECK-NEXT: vid.v v8
220 ; CHECK-NEXT: vadd.vv v8, v8, v8
221 ; CHECK-NEXT: vrsub.vi v8, v8, 0
223 ret <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>
226 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef1() {
227 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef1:
229 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
230 ; CHECK-NEXT: vid.v v8
231 ; CHECK-NEXT: vadd.vv v8, v8, v8
232 ; CHECK-NEXT: vrsub.vi v8, v8, 0
234 ret <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>
237 define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef2() {
238 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef2:
240 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
241 ; CHECK-NEXT: vmv.v.i v8, -6
243 ret <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>
246 define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
247 ; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
249 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
250 ; CHECK-NEXT: vid.v v8
251 ; CHECK-NEXT: vadd.vv v8, v8, v8
252 ; CHECK-NEXT: vrsub.vi v8, v8, 3
254 ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
257 define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
258 ; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
260 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
261 ; CHECK-NEXT: vmv.v.i v9, 3
262 ; CHECK-NEXT: vid.v v8
263 ; CHECK-NEXT: li a0, 253
264 ; CHECK-NEXT: vmadd.vx v8, a0, v9
266 ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
269 define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
270 ; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32:
272 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
273 ; CHECK-NEXT: vmv.v.i v8, -3
274 ; CHECK-NEXT: vid.v v9
275 ; CHECK-NEXT: li a4, -3
276 ; CHECK-NEXT: vmadd.vx v9, a4, v8
277 ; CHECK-NEXT: vse32.v v9, (a0)
278 ; CHECK-NEXT: vse32.v v9, (a1)
279 ; CHECK-NEXT: vse32.v v9, (a2)
280 ; CHECK-NEXT: vse32.v v9, (a3)
282 store <4 x i32> <i32 -3, i32 -6, i32 -9, i32 -12>, ptr %z0
283 store <4 x i32> <i32 undef, i32 -6, i32 -9, i32 -12>, ptr %z1
284 store <4 x i32> <i32 undef, i32 undef, i32 -9, i32 -12>, ptr %z2
285 store <4 x i32> <i32 -3, i32 undef, i32 undef, i32 -12>, ptr %z3
289 ; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization.
290 define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
291 ; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
293 ; RV32-NEXT: lui a0, %hi(.LCPI25_0)
294 ; RV32-NEXT: addi a0, a0, %lo(.LCPI25_0)
295 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
296 ; RV32-NEXT: vle8.v v10, (a0)
297 ; RV32-NEXT: vsext.vf4 v8, v10
300 ; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
302 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
303 ; RV64V-NEXT: vid.v v8
306 ; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64:
307 ; RV64ZVE32: # %bb.0:
308 ; RV64ZVE32-NEXT: li a1, 3
309 ; RV64ZVE32-NEXT: sd a1, 24(a0)
310 ; RV64ZVE32-NEXT: li a1, 2
311 ; RV64ZVE32-NEXT: sd a1, 16(a0)
312 ; RV64ZVE32-NEXT: li a1, 1
313 ; RV64ZVE32-NEXT: sd a1, 8(a0)
314 ; RV64ZVE32-NEXT: sd zero, 0(a0)
315 ; RV64ZVE32-NEXT: ret
316 ret <4 x i64> <i64 0, i64 1, i64 2, i64 3>
319 define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
320 ; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
322 ; RV32-NEXT: lui a0, %hi(.LCPI26_0)
323 ; RV32-NEXT: addi a0, a0, %lo(.LCPI26_0)
324 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
325 ; RV32-NEXT: vle8.v v10, (a0)
326 ; RV32-NEXT: vsext.vf4 v8, v10
329 ; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
331 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
332 ; RV64V-NEXT: vid.v v8
333 ; RV64V-NEXT: vadd.vv v8, v8, v8
336 ; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64:
337 ; RV64ZVE32: # %bb.0:
338 ; RV64ZVE32-NEXT: li a1, 6
339 ; RV64ZVE32-NEXT: sd a1, 24(a0)
340 ; RV64ZVE32-NEXT: li a1, 4
341 ; RV64ZVE32-NEXT: sd a1, 16(a0)
342 ; RV64ZVE32-NEXT: li a1, 2
343 ; RV64ZVE32-NEXT: sd a1, 8(a0)
344 ; RV64ZVE32-NEXT: sd zero, 0(a0)
345 ; RV64ZVE32-NEXT: ret
346 ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
349 define <4 x i8> @buildvec_no_vid_v4i8_0() {
350 ; CHECK-LABEL: buildvec_no_vid_v4i8_0:
352 ; CHECK-NEXT: lui a0, 28768
353 ; CHECK-NEXT: addi a0, a0, 769
354 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
355 ; CHECK-NEXT: vmv.s.x v8, a0
357 ret <4 x i8> <i8 1, i8 3, i8 6, i8 7>
360 define <4 x i8> @buildvec_no_vid_v4i8_1() {
361 ; CHECK-LABEL: buildvec_no_vid_v4i8_1:
363 ; CHECK-NEXT: lui a0, 28752
364 ; CHECK-NEXT: addi a0, a0, 512
365 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
366 ; CHECK-NEXT: vmv.s.x v8, a0
368 ret <4 x i8> <i8 undef, i8 2, i8 5, i8 7>
371 define <4 x i8> @buildvec_no_vid_v4i8_2() {
372 ; CHECK-LABEL: buildvec_no_vid_v4i8_2:
374 ; CHECK-NEXT: lui a0, 32768
375 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
376 ; CHECK-NEXT: vmv.s.x v8, a0
378 ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>
381 define <4 x i8> @buildvec_no_vid_v4i8_3() {
382 ; CHECK-LABEL: buildvec_no_vid_v4i8_3:
384 ; CHECK-NEXT: lui a0, 28672
385 ; CHECK-NEXT: addi a0, a0, 255
386 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
387 ; CHECK-NEXT: vmv.s.x v8, a0
389 ret <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>
392 define <4 x i8> @buildvec_no_vid_v4i8_4() {
393 ; CHECK-LABEL: buildvec_no_vid_v4i8_4:
395 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
396 ; CHECK-NEXT: vmv.v.i v8, -2
398 ret <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>
401 define <4 x i8> @buildvec_no_vid_v4i8_5() {
402 ; CHECK-LABEL: buildvec_no_vid_v4i8_5:
404 ; CHECK-NEXT: lui a0, 1032144
405 ; CHECK-NEXT: addi a0, a0, -257
406 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
407 ; CHECK-NEXT: vmv.s.x v8, a0
409 ret <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>
412 define void @buildvec_dominant0_v8i16(ptr %x) {
413 ; CHECK-LABEL: buildvec_dominant0_v8i16:
415 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
416 ; CHECK-NEXT: vmv.s.x v8, zero
417 ; CHECK-NEXT: vmv.v.i v9, 8
418 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
419 ; CHECK-NEXT: vslideup.vi v9, v8, 3
420 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
421 ; CHECK-NEXT: vse16.v v9, (a0)
423 store <8 x i16> <i16 8, i16 8, i16 undef, i16 0, i16 8, i16 undef, i16 8, i16 8>, ptr %x
427 define void @buildvec_dominant1_v8i16(ptr %x) {
428 ; CHECK-LABEL: buildvec_dominant1_v8i16:
430 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
431 ; CHECK-NEXT: vmv.v.i v8, 8
432 ; CHECK-NEXT: vse16.v v8, (a0)
434 store <8 x i16> <i16 undef, i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, ptr %x
438 define <2 x i8> @buildvec_dominant0_v2i8() {
439 ; CHECK-LABEL: buildvec_dominant0_v2i8:
442 ret <2 x i8> <i8 undef, i8 undef>
445 define <2 x i8> @buildvec_dominant1_v2i8() {
446 ; RV32-LABEL: buildvec_dominant1_v2i8:
448 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
449 ; RV32-NEXT: vmv.v.i v8, -1
452 ; RV64V-LABEL: buildvec_dominant1_v2i8:
454 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
455 ; RV64V-NEXT: vmv.v.i v8, -1
458 ; RV64ZVE32-LABEL: buildvec_dominant1_v2i8:
459 ; RV64ZVE32: # %bb.0:
460 ; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
461 ; RV64ZVE32-NEXT: vmv.v.i v8, -1
462 ; RV64ZVE32-NEXT: ret
463 ret <2 x i8> <i8 undef, i8 -1>
466 define <2 x i8> @buildvec_dominant2_v2i8() {
467 ; RV32-LABEL: buildvec_dominant2_v2i8:
469 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
470 ; RV32-NEXT: vid.v v8
471 ; RV32-NEXT: vrsub.vi v8, v8, 0
474 ; RV64V-LABEL: buildvec_dominant2_v2i8:
476 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
477 ; RV64V-NEXT: vid.v v8
478 ; RV64V-NEXT: vrsub.vi v8, v8, 0
481 ; RV64ZVE32-LABEL: buildvec_dominant2_v2i8:
482 ; RV64ZVE32: # %bb.0:
483 ; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
484 ; RV64ZVE32-NEXT: vid.v v8
485 ; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0
486 ; RV64ZVE32-NEXT: ret
487 ret <2 x i8> <i8 0, i8 -1>
490 define void @buildvec_dominant0_v2i32(ptr %x) {
491 ; RV32-LABEL: buildvec_dominant0_v2i32:
493 ; RV32-NEXT: lui a1, %hi(.LCPI38_0)
494 ; RV32-NEXT: addi a1, a1, %lo(.LCPI38_0)
495 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
496 ; RV32-NEXT: vle32.v v8, (a1)
497 ; RV32-NEXT: vse32.v v8, (a0)
500 ; RV64V-LABEL: buildvec_dominant0_v2i32:
502 ; RV64V-NEXT: lui a1, %hi(.LCPI38_0)
503 ; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1)
504 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
505 ; RV64V-NEXT: vmv.v.i v8, -1
506 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
507 ; RV64V-NEXT: vmv.s.x v8, a1
508 ; RV64V-NEXT: vse64.v v8, (a0)
511 ; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
512 ; RV64ZVE32: # %bb.0:
513 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0)
514 ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1)
515 ; RV64ZVE32-NEXT: li a2, -1
516 ; RV64ZVE32-NEXT: sd a2, 8(a0)
517 ; RV64ZVE32-NEXT: sd a1, 0(a0)
518 ; RV64ZVE32-NEXT: ret
519 store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
523 define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
524 ; RV32-LABEL: buildvec_dominant1_optsize_v2i32:
526 ; RV32-NEXT: lui a1, %hi(.LCPI39_0)
527 ; RV32-NEXT: addi a1, a1, %lo(.LCPI39_0)
528 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
529 ; RV32-NEXT: vle32.v v8, (a1)
530 ; RV32-NEXT: vse32.v v8, (a0)
533 ; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
535 ; RV64V-NEXT: lui a1, %hi(.LCPI39_0)
536 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0)
537 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
538 ; RV64V-NEXT: vle64.v v8, (a1)
539 ; RV64V-NEXT: vse64.v v8, (a0)
542 ; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
543 ; RV64ZVE32: # %bb.0:
544 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0)
545 ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1)
546 ; RV64ZVE32-NEXT: li a2, -1
547 ; RV64ZVE32-NEXT: sd a2, 8(a0)
548 ; RV64ZVE32-NEXT: sd a1, 0(a0)
549 ; RV64ZVE32-NEXT: ret
550 store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
554 define void @buildvec_seq_v8i8_v4i16(ptr %x) {
555 ; CHECK-LABEL: buildvec_seq_v8i8_v4i16:
557 ; CHECK-NEXT: li a1, 513
558 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
559 ; CHECK-NEXT: vmv.v.x v8, a1
560 ; CHECK-NEXT: vse8.v v8, (a0)
562 store <8 x i8> <i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 undef, i8 2>, ptr %x
566 define void @buildvec_seq_v8i8_v2i32(ptr %x) {
567 ; RV32-LABEL: buildvec_seq_v8i8_v2i32:
569 ; RV32-NEXT: lui a1, 48
570 ; RV32-NEXT: addi a1, a1, 513
571 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
572 ; RV32-NEXT: vmv.v.x v8, a1
573 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
574 ; RV32-NEXT: vse8.v v8, (a0)
577 ; RV64V-LABEL: buildvec_seq_v8i8_v2i32:
579 ; RV64V-NEXT: lui a1, 48
580 ; RV64V-NEXT: addi a1, a1, 513
581 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
582 ; RV64V-NEXT: vmv.v.x v8, a1
583 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
584 ; RV64V-NEXT: vse8.v v8, (a0)
587 ; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32:
588 ; RV64ZVE32: # %bb.0:
589 ; RV64ZVE32-NEXT: lui a1, 48
590 ; RV64ZVE32-NEXT: addi a1, a1, 513
591 ; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
592 ; RV64ZVE32-NEXT: vmv.v.x v8, a1
593 ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
594 ; RV64ZVE32-NEXT: vse8.v v8, (a0)
595 ; RV64ZVE32-NEXT: ret
596 store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, ptr %x
600 define void @buildvec_seq_v16i8_v2i64(ptr %x) {
601 ; RV32-LABEL: buildvec_seq_v16i8_v2i64:
603 ; RV32-NEXT: lui a1, %hi(.LCPI42_0)
604 ; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0)
605 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
606 ; RV32-NEXT: vle8.v v8, (a1)
607 ; RV32-NEXT: vse8.v v8, (a0)
610 ; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
612 ; RV64V-NEXT: lui a1, %hi(.LCPI42_0)
613 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI42_0)
614 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
615 ; RV64V-NEXT: vlse64.v v8, (a1), zero
616 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
617 ; RV64V-NEXT: vse8.v v8, (a0)
620 ; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
621 ; RV64ZVE32: # %bb.0:
622 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0)
623 ; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0)
624 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
625 ; RV64ZVE32-NEXT: vle8.v v8, (a1)
626 ; RV64ZVE32-NEXT: vse8.v v8, (a0)
627 ; RV64ZVE32-NEXT: ret
628 store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %x
632 define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
633 ; RV32-LABEL: buildvec_seq2_v16i8_v2i64:
635 ; RV32-NEXT: lui a1, 528432
636 ; RV32-NEXT: addi a1, a1, 513
637 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
638 ; RV32-NEXT: vmv.v.x v8, a1
639 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
640 ; RV32-NEXT: vse8.v v8, (a0)
643 ; RV64V-LABEL: buildvec_seq2_v16i8_v2i64:
645 ; RV64V-NEXT: lui a1, 528432
646 ; RV64V-NEXT: addiw a1, a1, 513
647 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
648 ; RV64V-NEXT: vmv.v.x v8, a1
649 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
650 ; RV64V-NEXT: vse8.v v8, (a0)
653 ; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
654 ; RV64ZVE32: # %bb.0:
655 ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0)
656 ; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0)
657 ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
658 ; RV64ZVE32-NEXT: vle8.v v8, (a1)
659 ; RV64ZVE32-NEXT: vse8.v v8, (a0)
660 ; RV64ZVE32-NEXT: ret
661 store <16 x i8> <i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1, i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1>, ptr %x
665 define void @buildvec_seq_v9i8(ptr %x) {
666 ; CHECK-LABEL: buildvec_seq_v9i8:
668 ; CHECK-NEXT: li a1, 73
669 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
670 ; CHECK-NEXT: vmv.s.x v0, a1
671 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
672 ; CHECK-NEXT: vmv.v.i v8, 3
673 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
674 ; CHECK-NEXT: li a1, 146
675 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
676 ; CHECK-NEXT: vmv.s.x v0, a1
677 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
678 ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
679 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
680 ; CHECK-NEXT: vse8.v v8, (a0)
682 store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
686 define void @buildvec_seq_v4i16_v2i32(ptr %x) {
687 ; CHECK-LABEL: buildvec_seq_v4i16_v2i32:
689 ; CHECK-NEXT: li a1, -127
690 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
691 ; CHECK-NEXT: vmv.v.x v8, a1
692 ; CHECK-NEXT: vse16.v v8, (a0)
694 store <4 x i16> <i16 -127, i16 -1, i16 -127, i16 -1>, ptr %x
698 define void @buildvec_vid_step1o2_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
699 ; RV32-LABEL: buildvec_vid_step1o2_v4i32:
701 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
702 ; RV32-NEXT: vid.v v8
703 ; RV32-NEXT: vsrl.vi v8, v8, 1
704 ; RV32-NEXT: vse32.v v8, (a0)
705 ; RV32-NEXT: vse32.v v8, (a1)
706 ; RV32-NEXT: vmv.v.i v9, 1
707 ; RV32-NEXT: vse32.v v8, (a2)
708 ; RV32-NEXT: vse32.v v8, (a3)
709 ; RV32-NEXT: vse32.v v8, (a4)
710 ; RV32-NEXT: vmv.s.x v8, zero
711 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
712 ; RV32-NEXT: vslideup.vi v9, v8, 1
713 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
714 ; RV32-NEXT: vse32.v v9, (a5)
715 ; RV32-NEXT: vmv.v.i v8, 0
716 ; RV32-NEXT: li a0, 1
717 ; RV32-NEXT: vslide1down.vx v8, v8, a0
718 ; RV32-NEXT: vse32.v v8, (a6)
721 ; RV64-LABEL: buildvec_vid_step1o2_v4i32:
723 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
724 ; RV64-NEXT: vid.v v8
725 ; RV64-NEXT: vsrl.vi v8, v8, 1
726 ; RV64-NEXT: vse32.v v8, (a0)
727 ; RV64-NEXT: vmv.v.i v9, 1
728 ; RV64-NEXT: vse32.v v8, (a1)
729 ; RV64-NEXT: vse32.v v8, (a2)
730 ; RV64-NEXT: vse32.v v8, (a3)
731 ; RV64-NEXT: vse32.v v8, (a4)
732 ; RV64-NEXT: vmv.s.x v8, zero
733 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
734 ; RV64-NEXT: vslideup.vi v9, v8, 1
735 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
736 ; RV64-NEXT: vse32.v v9, (a5)
737 ; RV64-NEXT: vmv.v.i v8, 0
738 ; RV64-NEXT: li a0, 1
739 ; RV64-NEXT: vslide1down.vx v8, v8, a0
740 ; RV64-NEXT: vse32.v v8, (a6)
742 store <4 x i32> <i32 0, i32 0, i32 1, i32 1>, ptr %z0
743 store <4 x i32> <i32 0, i32 0, i32 1, i32 undef>, ptr %z1
744 store <4 x i32> <i32 0, i32 undef, i32 1, i32 1>, ptr %z2
745 store <4 x i32> <i32 undef, i32 0, i32 undef, i32 1>, ptr %z3
746 store <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>, ptr %z4
747 ; We don't catch this one
748 store <4 x i32> <i32 undef, i32 0, i32 1, i32 1>, ptr %z5
749 ; We catch this one but as VID/3 rather than VID/2
750 store <4 x i32> <i32 0, i32 0, i32 undef, i32 1>, ptr %z6
754 define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
755 ; CHECK-LABEL: buildvec_vid_step1o2_add3_v4i16:
757 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
758 ; CHECK-NEXT: vid.v v8
759 ; CHECK-NEXT: vsrl.vi v8, v8, 1
760 ; CHECK-NEXT: vadd.vi v8, v8, 3
761 ; CHECK-NEXT: vse16.v v8, (a0)
762 ; CHECK-NEXT: vmv.v.i v9, 3
763 ; CHECK-NEXT: vse16.v v8, (a1)
764 ; CHECK-NEXT: vse16.v v8, (a2)
765 ; CHECK-NEXT: vse16.v v8, (a3)
766 ; CHECK-NEXT: vse16.v v8, (a4)
767 ; CHECK-NEXT: vmv.v.i v8, 4
768 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
769 ; CHECK-NEXT: vslideup.vi v8, v9, 1
770 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
771 ; CHECK-NEXT: vse16.v v8, (a5)
772 ; CHECK-NEXT: li a0, 4
773 ; CHECK-NEXT: vslide1down.vx v8, v9, a0
774 ; CHECK-NEXT: vse16.v v8, (a6)
776 store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0
777 store <4 x i16> <i16 3, i16 3, i16 4, i16 undef>, ptr %z1
778 store <4 x i16> <i16 3, i16 undef, i16 4, i16 4>, ptr %z2
779 store <4 x i16> <i16 undef, i16 3, i16 undef, i16 4>, ptr %z3
780 store <4 x i16> <i16 3, i16 undef, i16 4, i16 undef>, ptr %z4
781 ; We don't catch this one
782 store <4 x i16> <i16 undef, i16 3, i16 4, i16 4>, ptr %z5
783 ; We catch this one but as VID/3 rather than VID/2
784 store <4 x i16> <i16 3, i16 3, i16 undef, i16 4>, ptr %z6
788 define void @buildvec_vid_stepn1o4_addn5_v8i8(ptr %z0) {
789 ; CHECK-LABEL: buildvec_vid_stepn1o4_addn5_v8i8:
791 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
792 ; CHECK-NEXT: vid.v v8
793 ; CHECK-NEXT: vsrl.vi v8, v8, 2
794 ; CHECK-NEXT: vrsub.vi v8, v8, -5
795 ; CHECK-NEXT: vse8.v v8, (a0)
797 store <8 x i8> <i8 -5, i8 -5, i8 -5, i8 -5, i8 -6, i8 -6, i8 -6, i8 -6>, ptr %z0
801 define void @buildvec_vid_mpy_imm_v8i16(ptr %x) {
802 ; CHECK-LABEL: buildvec_vid_mpy_imm_v8i16:
804 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
805 ; CHECK-NEXT: vid.v v8
806 ; CHECK-NEXT: li a1, 17
807 ; CHECK-NEXT: vmul.vx v8, v8, a1
808 ; CHECK-NEXT: vse16.v v8, (a0)
810 store <8 x i16> <i16 0, i16 17, i16 34, i16 51, i16 68, i16 85, i16 102, i16 119>, ptr %x
814 define void @buildvec_vid_shl_imm_v8i16(ptr %x) {
815 ; CHECK-LABEL: buildvec_vid_shl_imm_v8i16:
817 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
818 ; CHECK-NEXT: vid.v v8
819 ; CHECK-NEXT: vsll.vi v8, v8, 9
820 ; CHECK-NEXT: vse16.v v8, (a0)
822 store <8 x i16> <i16 0, i16 512, i16 1024, i16 1536, i16 2048, i16 2560, i16 3072, i16 3584>, ptr %x
826 define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) {
827 ; CHECK-LABEL: splat_c3_v4i32:
829 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
830 ; CHECK-NEXT: vrgather.vi v9, v8, 3
831 ; CHECK-NEXT: vmv.v.v v8, v9
833 %x = extractelement <4 x i32> %v, i32 3
834 %ins = insertelement <4 x i32> poison, i32 %x, i32 0
835 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
839 define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) {
840 ; CHECK-LABEL: splat_idx_v4i32:
842 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
843 ; CHECK-NEXT: vrgather.vx v9, v8, a0
844 ; CHECK-NEXT: vmv.v.v v8, v9
846 %x = extractelement <4 x i32> %v, i64 %idx
847 %ins = insertelement <4 x i32> poison, i32 %x, i32 0
848 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
852 define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) {
853 ; CHECK-LABEL: splat_c4_v8i16:
855 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
856 ; CHECK-NEXT: vrgather.vi v9, v8, 4
857 ; CHECK-NEXT: vmv.v.v v8, v9
859 %x = extractelement <8 x i16> %v, i32 4
860 %ins = insertelement <8 x i16> poison, i16 %x, i32 0
861 %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
865 define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) {
866 ; CHECK-LABEL: splat_idx_v8i16:
868 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
869 ; CHECK-NEXT: vrgather.vx v9, v8, a0
870 ; CHECK-NEXT: vmv.v.v v8, v9
872 %x = extractelement <8 x i16> %v, i64 %idx
873 %ins = insertelement <8 x i16> poison, i16 %x, i32 0
874 %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
878 define <4 x i8> @buildvec_not_vid_v4i8_1() {
879 ; CHECK-LABEL: buildvec_not_vid_v4i8_1:
881 ; CHECK-NEXT: lui a0, 12320
882 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
883 ; CHECK-NEXT: vmv.s.x v8, a0
885 ret <4 x i8> <i8 0, i8 0, i8 2, i8 3>
888 define <4 x i8> @buildvec_not_vid_v4i8_2() {
889 ; CHECK-LABEL: buildvec_not_vid_v4i8_2:
891 ; CHECK-NEXT: lui a0, 16
892 ; CHECK-NEXT: addi a0, a0, 771
893 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
894 ; CHECK-NEXT: vmv.s.x v8, a0
896 ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
899 ; We match this as a VID sequence (-3 / 8) + 5 but choose not to introduce
900 ; division to compute it.
901 define <16 x i8> @buildvec_not_vid_v16i8() {
902 ; CHECK-LABEL: buildvec_not_vid_v16i8:
904 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
905 ; CHECK-NEXT: vmv.v.i v9, 3
906 ; CHECK-NEXT: vmv.v.i v8, 0
907 ; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma
908 ; CHECK-NEXT: vslideup.vi v8, v9, 6
910 ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 3, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0>
913 define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
914 ; CHECK-LABEL: prefix_overwrite:
916 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
917 ; CHECK-NEXT: vmv.s.x v8, a0
918 ; CHECK-NEXT: vmv.s.x v10, a1
919 ; CHECK-NEXT: vslideup.vi v8, v10, 1
920 ; CHECK-NEXT: vmv.s.x v10, a2
921 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
922 ; CHECK-NEXT: vslideup.vi v8, v10, 2
923 ; CHECK-NEXT: vmv.s.x v10, a3
924 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
925 ; CHECK-NEXT: vslideup.vi v8, v10, 3
927 %v0 = insertelement <8 x i32> %vin, i32 %a, i32 0
928 %v1 = insertelement <8 x i32> %v0, i32 %b, i32 1
929 %v2 = insertelement <8 x i32> %v1, i32 %c, i32 2
930 %v3 = insertelement <8 x i32> %v2, i32 %d, i32 3
934 define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
935 ; CHECK-LABEL: suffix_overwrite:
937 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
938 ; CHECK-NEXT: vmv.s.x v10, a0
939 ; CHECK-NEXT: vslideup.vi v8, v10, 4
940 ; CHECK-NEXT: vmv.s.x v10, a1
941 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
942 ; CHECK-NEXT: vslideup.vi v8, v10, 5
943 ; CHECK-NEXT: vmv.s.x v10, a2
944 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
945 ; CHECK-NEXT: vslideup.vi v8, v10, 6
946 ; CHECK-NEXT: vmv.s.x v10, a3
947 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
948 ; CHECK-NEXT: vslideup.vi v8, v10, 7
950 %v0 = insertelement <8 x i32> %vin, i32 %a, i32 4
951 %v1 = insertelement <8 x i32> %v0, i32 %b, i32 5
952 %v2 = insertelement <8 x i32> %v1, i32 %c, i32 6
953 %v3 = insertelement <8 x i32> %v2, i32 %d, i32 7
957 define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
958 ; RV32-LABEL: v4xi64_exact:
960 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
961 ; RV32-NEXT: vmv.v.x v8, a4
962 ; RV32-NEXT: vslide1down.vx v8, v8, a5
963 ; RV32-NEXT: vslide1down.vx v8, v8, a6
964 ; RV32-NEXT: vslide1down.vx v9, v8, a7
965 ; RV32-NEXT: vmv.v.x v8, a0
966 ; RV32-NEXT: vslide1down.vx v8, v8, a1
967 ; RV32-NEXT: vslide1down.vx v8, v8, a2
968 ; RV32-NEXT: vslide1down.vx v8, v8, a3
971 ; RV64V-LABEL: v4xi64_exact:
973 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
974 ; RV64V-NEXT: vmv.v.x v8, a2
975 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
976 ; RV64V-NEXT: vmv.v.x v8, a0
977 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
980 ; RV64ZVE32-LABEL: v4xi64_exact:
981 ; RV64ZVE32: # %bb.0:
982 ; RV64ZVE32-NEXT: sd a4, 24(a0)
983 ; RV64ZVE32-NEXT: sd a3, 16(a0)
984 ; RV64ZVE32-NEXT: sd a2, 8(a0)
985 ; RV64ZVE32-NEXT: sd a1, 0(a0)
986 ; RV64ZVE32-NEXT: ret
987 %v1 = insertelement <4 x i64> poison, i64 %a, i32 0
988 %v2 = insertelement <4 x i64> %v1, i64 %b, i32 1
989 %v3 = insertelement <4 x i64> %v2, i64 %c, i32 2
990 %v4 = insertelement <4 x i64> %v3, i64 %d, i32 3
994 define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) vscale_range(2,2) {
995 ; RV32-LABEL: v8xi64_exact:
997 ; RV32-NEXT: lw t0, 28(sp)
998 ; RV32-NEXT: lw t1, 24(sp)
999 ; RV32-NEXT: lw t2, 20(sp)
1000 ; RV32-NEXT: lw t3, 12(sp)
1001 ; RV32-NEXT: lw t4, 8(sp)
1002 ; RV32-NEXT: lw t5, 4(sp)
1003 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1004 ; RV32-NEXT: vmv.v.x v8, a4
1005 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1006 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1007 ; RV32-NEXT: vslide1down.vx v9, v8, a7
1008 ; RV32-NEXT: vmv.v.x v8, a0
1009 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1010 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1011 ; RV32-NEXT: vlse32.v v10, (sp), zero
1012 ; RV32-NEXT: vslide1down.vx v8, v8, a3
1013 ; RV32-NEXT: addi a0, sp, 16
1014 ; RV32-NEXT: vlse32.v v11, (a0), zero
1015 ; RV32-NEXT: vslide1down.vx v10, v10, t5
1016 ; RV32-NEXT: vslide1down.vx v10, v10, t4
1017 ; RV32-NEXT: vslide1down.vx v10, v10, t3
1018 ; RV32-NEXT: vslide1down.vx v11, v11, t2
1019 ; RV32-NEXT: vslide1down.vx v11, v11, t1
1020 ; RV32-NEXT: vslide1down.vx v11, v11, t0
1023 ; RV64V-LABEL: v8xi64_exact:
1025 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1026 ; RV64V-NEXT: vmv.v.x v8, a2
1027 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
1028 ; RV64V-NEXT: vmv.v.x v8, a0
1029 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
1030 ; RV64V-NEXT: vmv.v.x v10, a4
1031 ; RV64V-NEXT: vslide1down.vx v10, v10, a5
1032 ; RV64V-NEXT: vmv.v.x v11, a6
1033 ; RV64V-NEXT: vslide1down.vx v11, v11, a7
1036 ; RV64ZVE32-LABEL: v8xi64_exact:
1037 ; RV64ZVE32: # %bb.0:
1038 ; RV64ZVE32-NEXT: ld t0, 0(sp)
1039 ; RV64ZVE32-NEXT: sd t0, 56(a0)
1040 ; RV64ZVE32-NEXT: sd a7, 48(a0)
1041 ; RV64ZVE32-NEXT: sd a6, 40(a0)
1042 ; RV64ZVE32-NEXT: sd a5, 32(a0)
1043 ; RV64ZVE32-NEXT: sd a4, 24(a0)
1044 ; RV64ZVE32-NEXT: sd a3, 16(a0)
1045 ; RV64ZVE32-NEXT: sd a2, 8(a0)
1046 ; RV64ZVE32-NEXT: sd a1, 0(a0)
1047 ; RV64ZVE32-NEXT: ret
1048 %v1 = insertelement <8 x i64> poison, i64 %a, i32 0
1049 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
1050 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
1051 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
1052 %v5 = insertelement <8 x i64> %v4, i64 %e, i32 4
1053 %v6 = insertelement <8 x i64> %v5, i64 %f, i32 5
1054 %v7 = insertelement <8 x i64> %v6, i64 %g, i32 6
1055 %v8 = insertelement <8 x i64> %v7, i64 %h, i32 7
1059 define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
1060 ; RV32-LABEL: v8xi64_exact_equal_halves:
1062 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1063 ; RV32-NEXT: vmv.v.x v8, a4
1064 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1065 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1066 ; RV32-NEXT: vslide1down.vx v9, v8, a7
1067 ; RV32-NEXT: vmv.v.x v8, a0
1068 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1069 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1070 ; RV32-NEXT: vslide1down.vx v8, v8, a3
1071 ; RV32-NEXT: vmv.v.v v10, v8
1072 ; RV32-NEXT: vmv.v.v v11, v9
1075 ; RV64V-LABEL: v8xi64_exact_equal_halves:
1077 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1078 ; RV64V-NEXT: vmv.v.x v8, a2
1079 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
1080 ; RV64V-NEXT: vmv.v.x v8, a0
1081 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
1082 ; RV64V-NEXT: vmv.v.v v10, v8
1083 ; RV64V-NEXT: vmv.v.v v11, v9
1086 ; RV64ZVE32-LABEL: v8xi64_exact_equal_halves:
1087 ; RV64ZVE32: # %bb.0:
1088 ; RV64ZVE32-NEXT: sd a4, 56(a0)
1089 ; RV64ZVE32-NEXT: sd a3, 48(a0)
1090 ; RV64ZVE32-NEXT: sd a2, 40(a0)
1091 ; RV64ZVE32-NEXT: sd a1, 32(a0)
1092 ; RV64ZVE32-NEXT: sd a4, 24(a0)
1093 ; RV64ZVE32-NEXT: sd a3, 16(a0)
1094 ; RV64ZVE32-NEXT: sd a2, 8(a0)
1095 ; RV64ZVE32-NEXT: sd a1, 0(a0)
1096 ; RV64ZVE32-NEXT: ret
1097 %v1 = insertelement <8 x i64> poison, i64 %a, i32 0
1098 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
1099 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
1100 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
1101 %v5 = insertelement <8 x i64> %v4, i64 %a, i32 4
1102 %v6 = insertelement <8 x i64> %v5, i64 %b, i32 5
1103 %v7 = insertelement <8 x i64> %v6, i64 %c, i32 6
1104 %v8 = insertelement <8 x i64> %v7, i64 %d, i32 7
1108 define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
1109 ; RV32-LABEL: v8xi64_exact_undef_suffix:
1111 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1112 ; RV32-NEXT: vmv.v.x v8, a4
1113 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1114 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1115 ; RV32-NEXT: vslide1down.vx v9, v8, a7
1116 ; RV32-NEXT: vmv.v.x v8, a0
1117 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1118 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1119 ; RV32-NEXT: vslide1down.vx v8, v8, a3
1122 ; RV64V-LABEL: v8xi64_exact_undef_suffix:
1124 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1125 ; RV64V-NEXT: vmv.v.x v8, a2
1126 ; RV64V-NEXT: vslide1down.vx v9, v8, a3
1127 ; RV64V-NEXT: vmv.v.x v8, a0
1128 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
1131 ; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix:
1132 ; RV64ZVE32: # %bb.0:
1133 ; RV64ZVE32-NEXT: sd a4, 24(a0)
1134 ; RV64ZVE32-NEXT: sd a3, 16(a0)
1135 ; RV64ZVE32-NEXT: sd a2, 8(a0)
1136 ; RV64ZVE32-NEXT: sd a1, 0(a0)
1137 ; RV64ZVE32-NEXT: ret
1138 %v1 = insertelement <8 x i64> poison, i64 %a, i32 0
1139 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
1140 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
1141 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
1145 define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
1146 ; RV32-LABEL: v8xi64_exact_undef_prefix:
1148 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1149 ; RV32-NEXT: vmv.v.x v8, a4
1150 ; RV32-NEXT: vslide1down.vx v8, v8, a5
1151 ; RV32-NEXT: vslide1down.vx v8, v8, a6
1152 ; RV32-NEXT: vslide1down.vx v11, v8, a7
1153 ; RV32-NEXT: vmv.v.x v8, a0
1154 ; RV32-NEXT: vslide1down.vx v8, v8, a1
1155 ; RV32-NEXT: vslide1down.vx v8, v8, a2
1156 ; RV32-NEXT: vslide1down.vx v10, v8, a3
1159 ; RV64V-LABEL: v8xi64_exact_undef_prefix:
1161 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1162 ; RV64V-NEXT: vmv.v.x v8, a2
1163 ; RV64V-NEXT: vslide1down.vx v11, v8, a3
1164 ; RV64V-NEXT: vmv.v.x v8, a0
1165 ; RV64V-NEXT: vslide1down.vx v10, v8, a1
1168 ; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix:
1169 ; RV64ZVE32: # %bb.0:
1170 ; RV64ZVE32-NEXT: sd a4, 56(a0)
1171 ; RV64ZVE32-NEXT: sd a3, 48(a0)
1172 ; RV64ZVE32-NEXT: sd a2, 40(a0)
1173 ; RV64ZVE32-NEXT: sd a1, 32(a0)
1174 ; RV64ZVE32-NEXT: ret
1175 %v1 = insertelement <8 x i64> poison, i64 %a, i32 4
1176 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 5
1177 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 6
1178 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 7