1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) {
6 ; CHECK-LABEL: insertelt_v4i32_0:
8 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
9 ; CHECK-NEXT: vmv.s.x v8, a0
11 %b = insertelement <4 x i32> %a, i32 %y, i32 0
15 define <4 x i32> @insertelt_v4i32_3(<4 x i32> %a, i32 %y) {
16 ; CHECK-LABEL: insertelt_v4i32_3:
18 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
19 ; CHECK-NEXT: vmv.s.x v9, a0
20 ; CHECK-NEXT: vslideup.vi v8, v9, 3
22 %b = insertelement <4 x i32> %a, i32 %y, i32 3
26 define <4 x i32> @insertelt_v4i32_idx(<4 x i32> %a, i32 %y, i32 zeroext %idx) {
27 ; CHECK-LABEL: insertelt_v4i32_idx:
29 ; CHECK-NEXT: addi a2, a1, 1
30 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
31 ; CHECK-NEXT: vmv.s.x v9, a0
32 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
33 ; CHECK-NEXT: vslideup.vx v8, v9, a1
35 %b = insertelement <4 x i32> %a, i32 %y, i32 %idx
39 define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
40 ; CHECK-LABEL: insertelt_v32i32_0:
42 ; CHECK-NEXT: li a1, 32
43 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
44 ; CHECK-NEXT: vmv.s.x v8, a0
46 %b = insertelement <32 x i32> %a, i32 %y, i32 0
50 ; FIXME: Should only require an m2 slideup
51 define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
52 ; CHECK-LABEL: insertelt_v32i32_4:
54 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
55 ; CHECK-NEXT: vmv.s.x v16, a0
56 ; CHECK-NEXT: vslideup.vi v8, v16, 4
58 %b = insertelement <32 x i32> %a, i32 %y, i32 4
62 define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) {
63 ; CHECK-LABEL: insertelt_v32i32_31:
65 ; CHECK-NEXT: li a1, 32
66 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
67 ; CHECK-NEXT: vmv.s.x v16, a0
68 ; CHECK-NEXT: vslideup.vi v8, v16, 31
70 %b = insertelement <32 x i32> %a, i32 %y, i32 31
74 define <32 x i32> @insertelt_v32i32_idx(<32 x i32> %a, i32 %y, i32 zeroext %idx) {
75 ; CHECK-LABEL: insertelt_v32i32_idx:
77 ; CHECK-NEXT: li a2, 32
78 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
79 ; CHECK-NEXT: vmv.s.x v16, a0
80 ; CHECK-NEXT: addi a0, a1, 1
81 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
82 ; CHECK-NEXT: vslideup.vx v8, v16, a1
84 %b = insertelement <32 x i32> %a, i32 %y, i32 %idx
88 define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
89 ; CHECK-LABEL: insertelt_v64i32_0:
91 ; CHECK-NEXT: li a1, 32
92 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
93 ; CHECK-NEXT: vmv.s.x v8, a0
95 %b = insertelement <64 x i32> %a, i32 %y, i32 0
99 define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) {
100 ; CHECK-LABEL: insertelt_v64i32_63:
102 ; CHECK-NEXT: li a1, 32
103 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
104 ; CHECK-NEXT: vmv.s.x v24, a0
105 ; CHECK-NEXT: vslideup.vi v16, v24, 31
107 %b = insertelement <64 x i32> %a, i32 %y, i32 63
111 define <64 x i32> @insertelt_v64i32_idx(<64 x i32> %a, i32 %y, i32 zeroext %idx) {
112 ; RV32-LABEL: insertelt_v64i32_idx:
114 ; RV32-NEXT: addi sp, sp, -384
115 ; RV32-NEXT: .cfi_def_cfa_offset 384
116 ; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
117 ; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
118 ; RV32-NEXT: .cfi_offset ra, -4
119 ; RV32-NEXT: .cfi_offset s0, -8
120 ; RV32-NEXT: addi s0, sp, 384
121 ; RV32-NEXT: .cfi_def_cfa s0, 0
122 ; RV32-NEXT: andi sp, sp, -128
123 ; RV32-NEXT: andi a1, a1, 63
124 ; RV32-NEXT: slli a1, a1, 2
125 ; RV32-NEXT: mv a2, sp
126 ; RV32-NEXT: add a1, a2, a1
127 ; RV32-NEXT: addi a3, sp, 128
128 ; RV32-NEXT: li a4, 32
129 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma
130 ; RV32-NEXT: vse32.v v16, (a3)
131 ; RV32-NEXT: vse32.v v8, (a2)
132 ; RV32-NEXT: sw a0, 0(a1)
133 ; RV32-NEXT: vle32.v v8, (a2)
134 ; RV32-NEXT: vle32.v v16, (a3)
135 ; RV32-NEXT: addi sp, s0, -384
136 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
137 ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
138 ; RV32-NEXT: addi sp, sp, 384
141 ; RV64-LABEL: insertelt_v64i32_idx:
143 ; RV64-NEXT: addi sp, sp, -384
144 ; RV64-NEXT: .cfi_def_cfa_offset 384
145 ; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
146 ; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
147 ; RV64-NEXT: .cfi_offset ra, -8
148 ; RV64-NEXT: .cfi_offset s0, -16
149 ; RV64-NEXT: addi s0, sp, 384
150 ; RV64-NEXT: .cfi_def_cfa s0, 0
151 ; RV64-NEXT: andi sp, sp, -128
152 ; RV64-NEXT: andi a1, a1, 63
153 ; RV64-NEXT: slli a1, a1, 2
154 ; RV64-NEXT: mv a2, sp
155 ; RV64-NEXT: add a1, a2, a1
156 ; RV64-NEXT: addi a3, sp, 128
157 ; RV64-NEXT: li a4, 32
158 ; RV64-NEXT: vsetvli zero, a4, e32, m8, ta, ma
159 ; RV64-NEXT: vse32.v v16, (a3)
160 ; RV64-NEXT: vse32.v v8, (a2)
161 ; RV64-NEXT: sw a0, 0(a1)
162 ; RV64-NEXT: vle32.v v8, (a2)
163 ; RV64-NEXT: vle32.v v16, (a3)
164 ; RV64-NEXT: addi sp, s0, -384
165 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
166 ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
167 ; RV64-NEXT: addi sp, sp, 384
169 %b = insertelement <64 x i32> %a, i32 %y, i32 %idx
173 ; FIXME: This codegen needs to be improved. These tests previously asserted
174 ; type legalizing the i64 type on RV32.
176 define <4 x i64> @insertelt_v4i64(<4 x i64> %a, i64 %y) {
177 ; RV32-LABEL: insertelt_v4i64:
179 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
180 ; RV32-NEXT: vslide1down.vx v10, v8, a0
181 ; RV32-NEXT: vslide1down.vx v10, v10, a1
182 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
183 ; RV32-NEXT: vslideup.vi v8, v10, 3
186 ; RV64-LABEL: insertelt_v4i64:
188 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
189 ; RV64-NEXT: vmv.s.x v10, a0
190 ; RV64-NEXT: vslideup.vi v8, v10, 3
192 %b = insertelement <4 x i64> %a, i64 %y, i32 3
196 define void @insertelt_v4i64_store(ptr %x, i64 %y) {
197 ; RV32-LABEL: insertelt_v4i64_store:
199 ; RV32-NEXT: sw a2, 28(a0)
200 ; RV32-NEXT: sw a1, 24(a0)
203 ; RV64-LABEL: insertelt_v4i64_store:
205 ; RV64-NEXT: sd a1, 24(a0)
207 %a = load <4 x i64>, ptr %x
208 %b = insertelement <4 x i64> %a, i64 %y, i32 3
209 store <4 x i64> %b, ptr %x
213 ; This uses a non-power of 2 type so that it isn't an MVT.
214 ; The align keeps the type legalizer from using a 256 bit load so we must split
215 ; it. This some operations that weren't support for scalable vectors when
216 ; this test was written.
217 define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) {
218 ; RV32-LABEL: insertelt_v3i64:
220 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
221 ; RV32-NEXT: vslidedown.vi v9, v8, 3
222 ; RV32-NEXT: vmv.x.s a2, v9
223 ; RV32-NEXT: vslidedown.vi v9, v8, 2
224 ; RV32-NEXT: vmv.x.s a3, v9
225 ; RV32-NEXT: vslidedown.vi v9, v8, 1
226 ; RV32-NEXT: vmv.x.s a4, v9
227 ; RV32-NEXT: vmv.x.s a5, v8
228 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
229 ; RV32-NEXT: vmv.v.x v8, a5
230 ; RV32-NEXT: vslide1down.vx v8, v8, a4
231 ; RV32-NEXT: vslide1down.vx v8, v8, a3
232 ; RV32-NEXT: vslide1down.vx v8, v8, a2
233 ; RV32-NEXT: vslide1down.vx v8, v8, a0
234 ; RV32-NEXT: vslide1down.vx v8, v8, a1
235 ; RV32-NEXT: vslidedown.vi v8, v8, 2
238 ; RV64-LABEL: insertelt_v3i64:
240 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
241 ; RV64-NEXT: vslidedown.vi v9, v8, 1
242 ; RV64-NEXT: vmv.x.s a1, v9
243 ; RV64-NEXT: vmv.x.s a2, v8
244 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
245 ; RV64-NEXT: vmv.v.x v8, a2
246 ; RV64-NEXT: vslide1down.vx v8, v8, a1
247 ; RV64-NEXT: vslide1down.vx v8, v8, a0
248 ; RV64-NEXT: vslidedown.vi v8, v8, 1
250 %b = insertelement <3 x i64> %a, i64 %y, i32 2
254 define void @insertelt_v3i64_store(ptr %x, i64 %y) {
255 ; RV32-LABEL: insertelt_v3i64_store:
257 ; RV32-NEXT: sw a2, 20(a0)
258 ; RV32-NEXT: sw a1, 16(a0)
261 ; RV64-LABEL: insertelt_v3i64_store:
263 ; RV64-NEXT: sd a1, 16(a0)
265 %a = load <3 x i64>, ptr %x, align 8
266 %b = insertelement <3 x i64> %a, i64 %y, i32 2
267 store <3 x i64> %b, ptr %x
271 define <16 x i8> @insertelt_v16i8(<16 x i8> %a, i8 %y) {
272 ; CHECK-LABEL: insertelt_v16i8:
274 ; CHECK-NEXT: vsetivli zero, 15, e8, m1, tu, ma
275 ; CHECK-NEXT: vmv.s.x v9, a0
276 ; CHECK-NEXT: vslideup.vi v8, v9, 14
278 %b = insertelement <16 x i8> %a, i8 %y, i32 14
282 define void @insertelt_v16i8_store(ptr %x, i8 %y) {
283 ; CHECK-LABEL: insertelt_v16i8_store:
285 ; CHECK-NEXT: sb a1, 14(a0)
287 %a = load <16 x i8>, ptr %x
288 %b = insertelement <16 x i8> %a, i8 %y, i32 14
289 store <16 x i8> %b, ptr %x
293 define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) {
294 ; RV32-LABEL: insertelt_v32i16:
296 ; RV32-NEXT: li a2, 32
297 ; RV32-NEXT: vsetvli zero, a2, e16, m1, ta, ma
298 ; RV32-NEXT: vmv.s.x v12, a0
299 ; RV32-NEXT: addi a0, a1, 1
300 ; RV32-NEXT: vsetvli zero, a0, e16, m4, tu, ma
301 ; RV32-NEXT: vslideup.vx v8, v12, a1
304 ; RV64-LABEL: insertelt_v32i16:
306 ; RV64-NEXT: li a2, 32
307 ; RV64-NEXT: vsetvli zero, a2, e16, m1, ta, ma
308 ; RV64-NEXT: vmv.s.x v12, a0
309 ; RV64-NEXT: slli a1, a1, 32
310 ; RV64-NEXT: srli a1, a1, 32
311 ; RV64-NEXT: addi a0, a1, 1
312 ; RV64-NEXT: vsetvli zero, a0, e16, m4, tu, ma
313 ; RV64-NEXT: vslideup.vx v8, v12, a1
315 %b = insertelement <32 x i16> %a, i16 %y, i32 %idx
319 define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) {
320 ; CHECK-LABEL: insertelt_v32i16_store:
322 ; CHECK-NEXT: andi a2, a2, 31
323 ; CHECK-NEXT: slli a2, a2, 1
324 ; CHECK-NEXT: add a0, a0, a2
325 ; CHECK-NEXT: sh a1, 0(a0)
327 %a = load <32 x i16>, ptr %x
328 %b = insertelement <32 x i16> %a, i16 %y, i32 %idx
329 store <32 x i16> %b, ptr %x
333 define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) {
334 ; RV32-LABEL: insertelt_v8f32:
336 ; RV32-NEXT: vsetivli zero, 8, e32, m1, ta, ma
337 ; RV32-NEXT: vfmv.s.f v10, fa0
338 ; RV32-NEXT: addi a1, a0, 1
339 ; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma
340 ; RV32-NEXT: vslideup.vx v8, v10, a0
343 ; RV64-LABEL: insertelt_v8f32:
345 ; RV64-NEXT: vsetivli zero, 8, e32, m1, ta, ma
346 ; RV64-NEXT: vfmv.s.f v10, fa0
347 ; RV64-NEXT: slli a0, a0, 32
348 ; RV64-NEXT: srli a0, a0, 32
349 ; RV64-NEXT: addi a1, a0, 1
350 ; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma
351 ; RV64-NEXT: vslideup.vx v8, v10, a0
353 %b = insertelement <8 x float> %a, float %y, i32 %idx
357 define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) {
358 ; CHECK-LABEL: insertelt_v8f32_store:
360 ; CHECK-NEXT: andi a1, a1, 7
361 ; CHECK-NEXT: slli a1, a1, 2
362 ; CHECK-NEXT: add a0, a0, a1
363 ; CHECK-NEXT: fsw fa0, 0(a0)
365 %a = load <8 x float>, ptr %x
366 %b = insertelement <8 x float> %a, float %y, i32 %idx
367 store <8 x float> %b, ptr %x
371 define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) {
372 ; CHECK-LABEL: insertelt_v8i64_0:
374 ; CHECK-NEXT: li a0, -1
375 ; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
376 ; CHECK-NEXT: vmv.s.x v8, a0
378 %b = insertelement <8 x i64> %a, i64 -1, i32 0
382 define void @insertelt_v8i64_0_store(ptr %x) {
383 ; RV32-LABEL: insertelt_v8i64_0_store:
385 ; RV32-NEXT: li a1, -1
386 ; RV32-NEXT: sw a1, 4(a0)
387 ; RV32-NEXT: sw a1, 0(a0)
390 ; RV64-LABEL: insertelt_v8i64_0_store:
392 ; RV64-NEXT: li a1, -1
393 ; RV64-NEXT: sd a1, 0(a0)
395 %a = load <8 x i64>, ptr %x
396 %b = insertelement <8 x i64> %a, i64 -1, i32 0
397 store <8 x i64> %b, ptr %x
401 define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
402 ; RV32-LABEL: insertelt_v8i64:
404 ; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
405 ; RV32-NEXT: vmv.v.i v12, -1
406 ; RV32-NEXT: addi a1, a0, 1
407 ; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
408 ; RV32-NEXT: vslideup.vx v8, v12, a0
411 ; RV64-LABEL: insertelt_v8i64:
413 ; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
414 ; RV64-NEXT: vmv.v.i v12, -1
415 ; RV64-NEXT: slli a0, a0, 32
416 ; RV64-NEXT: srli a0, a0, 32
417 ; RV64-NEXT: addi a1, a0, 1
418 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma
419 ; RV64-NEXT: vslideup.vx v8, v12, a0
421 %b = insertelement <8 x i64> %a, i64 -1, i32 %idx
425 define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
426 ; RV32-LABEL: insertelt_v8i64_store:
428 ; RV32-NEXT: andi a1, a1, 7
429 ; RV32-NEXT: slli a1, a1, 3
430 ; RV32-NEXT: add a0, a0, a1
431 ; RV32-NEXT: li a1, -1
432 ; RV32-NEXT: sw a1, 4(a0)
433 ; RV32-NEXT: sw a1, 0(a0)
436 ; RV64-LABEL: insertelt_v8i64_store:
438 ; RV64-NEXT: andi a1, a1, 7
439 ; RV64-NEXT: slli a1, a1, 3
440 ; RV64-NEXT: add a0, a0, a1
441 ; RV64-NEXT: li a1, -1
442 ; RV64-NEXT: sd a1, 0(a0)
444 %a = load <8 x i64>, ptr %x
445 %b = insertelement <8 x i64> %a, i64 -1, i32 %idx
446 store <8 x i64> %b, ptr %x
450 define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
451 ; CHECK-LABEL: insertelt_c6_v8i64_0:
453 ; CHECK-NEXT: li a0, 6
454 ; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
455 ; CHECK-NEXT: vmv.s.x v8, a0
457 %b = insertelement <8 x i64> %a, i64 6, i32 0
461 define void @insertelt_c6_v8i64_0_store(ptr %x) {
462 ; RV32-LABEL: insertelt_c6_v8i64_0_store:
464 ; RV32-NEXT: sw zero, 4(a0)
465 ; RV32-NEXT: li a1, 6
466 ; RV32-NEXT: sw a1, 0(a0)
469 ; RV64-LABEL: insertelt_c6_v8i64_0_store:
471 ; RV64-NEXT: li a1, 6
472 ; RV64-NEXT: sd a1, 0(a0)
474 %a = load <8 x i64>, ptr %x
475 %b = insertelement <8 x i64> %a, i64 6, i32 0
476 store <8 x i64> %b, ptr %x
480 define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
481 ; RV32-LABEL: insertelt_c6_v8i64:
483 ; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
484 ; RV32-NEXT: vmv.v.i v12, 6
485 ; RV32-NEXT: addi a1, a0, 1
486 ; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
487 ; RV32-NEXT: vslideup.vx v8, v12, a0
490 ; RV64-LABEL: insertelt_c6_v8i64:
492 ; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
493 ; RV64-NEXT: vmv.v.i v12, 6
494 ; RV64-NEXT: slli a0, a0, 32
495 ; RV64-NEXT: srli a0, a0, 32
496 ; RV64-NEXT: addi a1, a0, 1
497 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma
498 ; RV64-NEXT: vslideup.vx v8, v12, a0
500 %b = insertelement <8 x i64> %a, i64 6, i32 %idx
504 define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
505 ; RV32-LABEL: insertelt_c6_v8i64_store:
507 ; RV32-NEXT: andi a1, a1, 7
508 ; RV32-NEXT: slli a1, a1, 3
509 ; RV32-NEXT: add a0, a0, a1
510 ; RV32-NEXT: sw zero, 4(a0)
511 ; RV32-NEXT: li a1, 6
512 ; RV32-NEXT: sw a1, 0(a0)
515 ; RV64-LABEL: insertelt_c6_v8i64_store:
517 ; RV64-NEXT: andi a1, a1, 7
518 ; RV64-NEXT: slli a1, a1, 3
519 ; RV64-NEXT: add a0, a0, a1
520 ; RV64-NEXT: li a1, 6
521 ; RV64-NEXT: sd a1, 0(a0)
523 %a = load <8 x i64>, ptr %x
524 %b = insertelement <8 x i64> %a, i64 6, i32 %idx
525 store <8 x i64> %b, ptr %x
529 ; Test that using a insertelement at element 0 by a later operation doesn't
530 ; crash the compiler.
531 define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
532 ; CHECK-LABEL: insertelt_c6_v8i64_0_add:
534 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
535 ; CHECK-NEXT: vle64.v v8, (a0)
536 ; CHECK-NEXT: vle64.v v12, (a1)
537 ; CHECK-NEXT: li a1, 6
538 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
539 ; CHECK-NEXT: vmv.s.x v8, a1
540 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
541 ; CHECK-NEXT: vadd.vv v8, v8, v12
542 ; CHECK-NEXT: vse64.v v8, (a0)
544 %a = load <8 x i64>, ptr %x
545 %b = insertelement <8 x i64> %a, i64 6, i32 0
546 %c = load <8 x i64>, ptr %y
547 %d = add <8 x i64> %b, %c
548 store <8 x i64> %d, ptr %x
552 ; The next batch of tests cover inserts into high LMUL vectors when the
553 ; exact VLEM is known. FIXME: These can directly access the sub-registers
555 define <16 x i32> @insertelt_c0_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
556 ; CHECK-LABEL: insertelt_c0_v16xi32_exact:
558 ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
559 ; CHECK-NEXT: vmv.s.x v8, a0
561 %v = insertelement <16 x i32> %vin, i32 %a, i32 0
565 define <16 x i32> @insertelt_c1_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
566 ; CHECK-LABEL: insertelt_c1_v16xi32_exact:
568 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
569 ; CHECK-NEXT: vmv.s.x v12, a0
570 ; CHECK-NEXT: vslideup.vi v8, v12, 1
572 %v = insertelement <16 x i32> %vin, i32 %a, i32 1
576 define <16 x i32> @insertelt_c2_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
577 ; CHECK-LABEL: insertelt_c2_v16xi32_exact:
579 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
580 ; CHECK-NEXT: vmv.s.x v12, a0
581 ; CHECK-NEXT: vslideup.vi v8, v12, 2
583 %v = insertelement <16 x i32> %vin, i32 %a, i32 2
587 define <16 x i32> @insertelt_c3_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
588 ; CHECK-LABEL: insertelt_c3_v16xi32_exact:
590 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
591 ; CHECK-NEXT: vmv.s.x v12, a0
592 ; CHECK-NEXT: vslideup.vi v8, v12, 3
594 %v = insertelement <16 x i32> %vin, i32 %a, i32 3
598 define <16 x i32> @insertelt_c12_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
599 ; CHECK-LABEL: insertelt_c12_v16xi32_exact:
601 ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
602 ; CHECK-NEXT: vmv.s.x v11, a0
604 %v = insertelement <16 x i32> %vin, i32 %a, i32 12
608 define <16 x i32> @insertelt_c13_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
609 ; CHECK-LABEL: insertelt_c13_v16xi32_exact:
611 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
612 ; CHECK-NEXT: vmv.s.x v12, a0
613 ; CHECK-NEXT: vslideup.vi v11, v12, 1
615 %v = insertelement <16 x i32> %vin, i32 %a, i32 13
619 define <16 x i32> @insertelt_c14_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
620 ; CHECK-LABEL: insertelt_c14_v16xi32_exact:
622 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
623 ; CHECK-NEXT: vmv.s.x v12, a0
624 ; CHECK-NEXT: vslideup.vi v11, v12, 2
626 %v = insertelement <16 x i32> %vin, i32 %a, i32 14
630 define <16 x i32> @insertelt_c15_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
631 ; CHECK-LABEL: insertelt_c15_v16xi32_exact:
633 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
634 ; CHECK-NEXT: vmv.s.x v12, a0
635 ; CHECK-NEXT: vslideup.vi v11, v12, 3
637 %v = insertelement <16 x i32> %vin, i32 %a, i32 15
641 define <8 x i64> @insertelt_c4_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range(2,2) {
642 ; RV32-LABEL: insertelt_c4_v8xi64_exact:
644 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
645 ; RV32-NEXT: vslide1down.vx v10, v10, a0
646 ; RV32-NEXT: vslide1down.vx v10, v10, a1
649 ; RV64-LABEL: insertelt_c4_v8xi64_exact:
651 ; RV64-NEXT: vsetivli zero, 8, e64, m1, tu, ma
652 ; RV64-NEXT: vmv.s.x v10, a0
654 %v = insertelement <8 x i64> %vin, i64 %a, i32 4
658 define <8 x i64> @insertelt_c5_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range(2,2) {
659 ; RV32-LABEL: insertelt_c5_v8xi64_exact:
661 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
662 ; RV32-NEXT: vslide1down.vx v12, v8, a0
663 ; RV32-NEXT: vslide1down.vx v12, v12, a1
664 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
665 ; RV32-NEXT: vslideup.vi v10, v12, 1
668 ; RV64-LABEL: insertelt_c5_v8xi64_exact:
670 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
671 ; RV64-NEXT: vmv.s.x v12, a0
672 ; RV64-NEXT: vslideup.vi v10, v12, 1
674 %v = insertelement <8 x i64> %vin, i64 %a, i32 5