1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFH
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFH
4 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFHMIN,ZVFHMINRV32
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFHMIN,ZVFHMINRV64
7 define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) {
8 ; CHECK-LABEL: insertelt_v4i32_0:
10 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
11 ; CHECK-NEXT: vmv.s.x v8, a0
13 %b = insertelement <4 x i32> %a, i32 %y, i32 0
17 define <4 x i32> @insertelt_v4i32_3(<4 x i32> %a, i32 %y) {
18 ; CHECK-LABEL: insertelt_v4i32_3:
20 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
21 ; CHECK-NEXT: vmv.s.x v9, a0
22 ; CHECK-NEXT: vslideup.vi v8, v9, 3
24 %b = insertelement <4 x i32> %a, i32 %y, i32 3
28 define <4 x i32> @insertelt_v4i32_idx(<4 x i32> %a, i32 %y, i32 zeroext %idx) {
29 ; CHECK-LABEL: insertelt_v4i32_idx:
31 ; CHECK-NEXT: addi a2, a1, 1
32 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
33 ; CHECK-NEXT: vmv.s.x v9, a0
34 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
35 ; CHECK-NEXT: vslideup.vx v8, v9, a1
37 %b = insertelement <4 x i32> %a, i32 %y, i32 %idx
41 define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
42 ; CHECK-LABEL: insertelt_v32i32_0:
44 ; CHECK-NEXT: li a1, 32
45 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
46 ; CHECK-NEXT: vmv.s.x v8, a0
48 %b = insertelement <32 x i32> %a, i32 %y, i32 0
52 define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
53 ; CHECK-LABEL: insertelt_v32i32_4:
55 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
56 ; CHECK-NEXT: vmv.s.x v16, a0
57 ; CHECK-NEXT: vslideup.vi v8, v16, 4
59 %b = insertelement <32 x i32> %a, i32 %y, i32 4
63 define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) {
64 ; CHECK-LABEL: insertelt_v32i32_31:
66 ; CHECK-NEXT: li a1, 32
67 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
68 ; CHECK-NEXT: vmv.s.x v16, a0
69 ; CHECK-NEXT: vslideup.vi v8, v16, 31
71 %b = insertelement <32 x i32> %a, i32 %y, i32 31
75 define <32 x i32> @insertelt_v32i32_idx(<32 x i32> %a, i32 %y, i32 zeroext %idx) {
76 ; CHECK-LABEL: insertelt_v32i32_idx:
78 ; CHECK-NEXT: li a2, 32
79 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
80 ; CHECK-NEXT: vmv.s.x v16, a0
81 ; CHECK-NEXT: addi a0, a1, 1
82 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
83 ; CHECK-NEXT: vslideup.vx v8, v16, a1
85 %b = insertelement <32 x i32> %a, i32 %y, i32 %idx
89 define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
90 ; CHECK-LABEL: insertelt_v64i32_0:
92 ; CHECK-NEXT: li a1, 32
93 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
94 ; CHECK-NEXT: vmv.s.x v8, a0
96 %b = insertelement <64 x i32> %a, i32 %y, i32 0
100 define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) {
101 ; CHECK-LABEL: insertelt_v64i32_63:
103 ; CHECK-NEXT: li a1, 32
104 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
105 ; CHECK-NEXT: vmv.s.x v24, a0
106 ; CHECK-NEXT: vslideup.vi v16, v24, 31
108 %b = insertelement <64 x i32> %a, i32 %y, i32 63
112 define <64 x i32> @insertelt_v64i32_idx(<64 x i32> %a, i32 %y, i32 zeroext %idx) {
113 ; RV32-LABEL: insertelt_v64i32_idx:
115 ; RV32-NEXT: addi sp, sp, -384
116 ; RV32-NEXT: .cfi_def_cfa_offset 384
117 ; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
118 ; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
119 ; RV32-NEXT: .cfi_offset ra, -4
120 ; RV32-NEXT: .cfi_offset s0, -8
121 ; RV32-NEXT: addi s0, sp, 384
122 ; RV32-NEXT: .cfi_def_cfa s0, 0
123 ; RV32-NEXT: andi sp, sp, -128
124 ; RV32-NEXT: andi a1, a1, 63
125 ; RV32-NEXT: mv a2, sp
126 ; RV32-NEXT: addi a3, sp, 128
127 ; RV32-NEXT: li a4, 32
128 ; RV32-NEXT: slli a1, a1, 2
129 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma
130 ; RV32-NEXT: vse32.v v16, (a3)
131 ; RV32-NEXT: vse32.v v8, (a2)
132 ; RV32-NEXT: add a1, a2, a1
133 ; RV32-NEXT: sw a0, 0(a1)
134 ; RV32-NEXT: vle32.v v8, (a2)
135 ; RV32-NEXT: vle32.v v16, (a3)
136 ; RV32-NEXT: addi sp, s0, -384
137 ; RV32-NEXT: .cfi_def_cfa sp, 384
138 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
139 ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
140 ; RV32-NEXT: .cfi_restore ra
141 ; RV32-NEXT: .cfi_restore s0
142 ; RV32-NEXT: addi sp, sp, 384
143 ; RV32-NEXT: .cfi_def_cfa_offset 0
146 ; RV64-LABEL: insertelt_v64i32_idx:
148 ; RV64-NEXT: addi sp, sp, -384
149 ; RV64-NEXT: .cfi_def_cfa_offset 384
150 ; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
151 ; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
152 ; RV64-NEXT: .cfi_offset ra, -8
153 ; RV64-NEXT: .cfi_offset s0, -16
154 ; RV64-NEXT: addi s0, sp, 384
155 ; RV64-NEXT: .cfi_def_cfa s0, 0
156 ; RV64-NEXT: andi sp, sp, -128
157 ; RV64-NEXT: andi a1, a1, 63
158 ; RV64-NEXT: mv a2, sp
159 ; RV64-NEXT: addi a3, sp, 128
160 ; RV64-NEXT: li a4, 32
161 ; RV64-NEXT: slli a1, a1, 2
162 ; RV64-NEXT: vsetvli zero, a4, e32, m8, ta, ma
163 ; RV64-NEXT: vse32.v v16, (a3)
164 ; RV64-NEXT: vse32.v v8, (a2)
165 ; RV64-NEXT: add a1, a2, a1
166 ; RV64-NEXT: sw a0, 0(a1)
167 ; RV64-NEXT: vle32.v v8, (a2)
168 ; RV64-NEXT: vle32.v v16, (a3)
169 ; RV64-NEXT: addi sp, s0, -384
170 ; RV64-NEXT: .cfi_def_cfa sp, 384
171 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
172 ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
173 ; RV64-NEXT: .cfi_restore ra
174 ; RV64-NEXT: .cfi_restore s0
175 ; RV64-NEXT: addi sp, sp, 384
176 ; RV64-NEXT: .cfi_def_cfa_offset 0
178 %b = insertelement <64 x i32> %a, i32 %y, i32 %idx
182 ; FIXME: This codegen needs to be improved. These tests previously asserted
183 ; type legalizing the i64 type on RV32.
185 define <4 x i64> @insertelt_v4i64(<4 x i64> %a, i64 %y) {
186 ; RV32-LABEL: insertelt_v4i64:
188 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
189 ; RV32-NEXT: vslide1down.vx v10, v8, a0
190 ; RV32-NEXT: vslide1down.vx v10, v10, a1
191 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
192 ; RV32-NEXT: vslideup.vi v8, v10, 3
195 ; RV64-LABEL: insertelt_v4i64:
197 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
198 ; RV64-NEXT: vmv.s.x v10, a0
199 ; RV64-NEXT: vslideup.vi v8, v10, 3
201 %b = insertelement <4 x i64> %a, i64 %y, i32 3
205 define void @insertelt_v4i64_store(ptr %x, i64 %y) {
206 ; RV32-LABEL: insertelt_v4i64_store:
208 ; RV32-NEXT: sw a1, 24(a0)
209 ; RV32-NEXT: sw a2, 28(a0)
212 ; RV64-LABEL: insertelt_v4i64_store:
214 ; RV64-NEXT: sd a1, 24(a0)
216 %a = load <4 x i64>, ptr %x
217 %b = insertelement <4 x i64> %a, i64 %y, i32 3
218 store <4 x i64> %b, ptr %x
222 ; This uses a non-power of 2 type so that it isn't an MVT.
223 ; The align keeps the type legalizer from using a 256 bit load so we must split
224 ; it. This some operations that weren't support for scalable vectors when
225 ; this test was written.
226 define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) {
227 ; RV32-LABEL: insertelt_v3i64:
229 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
230 ; RV32-NEXT: vslidedown.vi v9, v8, 3
231 ; RV32-NEXT: vslidedown.vi v10, v8, 2
232 ; RV32-NEXT: vmv.x.s a2, v8
233 ; RV32-NEXT: vslidedown.vi v8, v8, 1
234 ; RV32-NEXT: vmv.x.s a3, v9
235 ; RV32-NEXT: vmv.x.s a4, v10
236 ; RV32-NEXT: vmv.x.s a5, v8
237 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
238 ; RV32-NEXT: vmv.v.x v8, a2
239 ; RV32-NEXT: vslide1down.vx v8, v8, a5
240 ; RV32-NEXT: vslide1down.vx v8, v8, a4
241 ; RV32-NEXT: vslide1down.vx v8, v8, a3
242 ; RV32-NEXT: vslide1down.vx v8, v8, a0
243 ; RV32-NEXT: vslide1down.vx v8, v8, a1
244 ; RV32-NEXT: vslidedown.vi v8, v8, 2
247 ; RV64-LABEL: insertelt_v3i64:
249 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
250 ; RV64-NEXT: vslidedown.vi v9, v8, 1
251 ; RV64-NEXT: vmv.x.s a1, v8
252 ; RV64-NEXT: vmv.x.s a2, v9
253 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
254 ; RV64-NEXT: vmv.v.x v8, a1
255 ; RV64-NEXT: vslide1down.vx v8, v8, a2
256 ; RV64-NEXT: vslide1down.vx v8, v8, a0
257 ; RV64-NEXT: vslidedown.vi v8, v8, 1
259 %b = insertelement <3 x i64> %a, i64 %y, i32 2
263 define void @insertelt_v3i64_store(ptr %x, i64 %y) {
264 ; RV32-LABEL: insertelt_v3i64_store:
266 ; RV32-NEXT: sw a1, 16(a0)
267 ; RV32-NEXT: sw a2, 20(a0)
270 ; RV64-LABEL: insertelt_v3i64_store:
272 ; RV64-NEXT: sd a1, 16(a0)
274 %a = load <3 x i64>, ptr %x, align 8
275 %b = insertelement <3 x i64> %a, i64 %y, i32 2
276 store <3 x i64> %b, ptr %x
280 define <16 x i8> @insertelt_v16i8(<16 x i8> %a, i8 %y) {
281 ; CHECK-LABEL: insertelt_v16i8:
283 ; CHECK-NEXT: vsetivli zero, 15, e8, m1, tu, ma
284 ; CHECK-NEXT: vmv.s.x v9, a0
285 ; CHECK-NEXT: vslideup.vi v8, v9, 14
287 %b = insertelement <16 x i8> %a, i8 %y, i32 14
291 define void @insertelt_v16i8_store(ptr %x, i8 %y) {
292 ; CHECK-LABEL: insertelt_v16i8_store:
294 ; CHECK-NEXT: sb a1, 14(a0)
296 %a = load <16 x i8>, ptr %x
297 %b = insertelement <16 x i8> %a, i8 %y, i32 14
298 store <16 x i8> %b, ptr %x
302 define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) {
303 ; RV32-LABEL: insertelt_v32i16:
305 ; RV32-NEXT: li a2, 32
306 ; RV32-NEXT: vsetvli zero, a2, e16, m1, ta, ma
307 ; RV32-NEXT: vmv.s.x v12, a0
308 ; RV32-NEXT: addi a0, a1, 1
309 ; RV32-NEXT: vsetvli zero, a0, e16, m4, tu, ma
310 ; RV32-NEXT: vslideup.vx v8, v12, a1
313 ; RV64-LABEL: insertelt_v32i16:
315 ; RV64-NEXT: li a2, 32
316 ; RV64-NEXT: slli a1, a1, 32
317 ; RV64-NEXT: vsetvli zero, a2, e16, m1, ta, ma
318 ; RV64-NEXT: vmv.s.x v12, a0
319 ; RV64-NEXT: srli a1, a1, 32
320 ; RV64-NEXT: addi a0, a1, 1
321 ; RV64-NEXT: vsetvli zero, a0, e16, m4, tu, ma
322 ; RV64-NEXT: vslideup.vx v8, v12, a1
324 %b = insertelement <32 x i16> %a, i16 %y, i32 %idx
328 define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) {
329 ; CHECK-LABEL: insertelt_v32i16_store:
331 ; CHECK-NEXT: andi a2, a2, 31
332 ; CHECK-NEXT: slli a2, a2, 1
333 ; CHECK-NEXT: add a0, a0, a2
334 ; CHECK-NEXT: sh a1, 0(a0)
336 %a = load <32 x i16>, ptr %x
337 %b = insertelement <32 x i16> %a, i16 %y, i32 %idx
338 store <32 x i16> %b, ptr %x
342 define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) {
343 ; RV32-LABEL: insertelt_v8f32:
345 ; RV32-NEXT: vsetivli zero, 8, e32, m1, ta, ma
346 ; RV32-NEXT: vfmv.s.f v10, fa0
347 ; RV32-NEXT: addi a1, a0, 1
348 ; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma
349 ; RV32-NEXT: vslideup.vx v8, v10, a0
352 ; RV64-LABEL: insertelt_v8f32:
354 ; RV64-NEXT: vsetivli zero, 8, e32, m1, ta, ma
355 ; RV64-NEXT: vfmv.s.f v10, fa0
356 ; RV64-NEXT: slli a0, a0, 32
357 ; RV64-NEXT: srli a0, a0, 32
358 ; RV64-NEXT: addi a1, a0, 1
359 ; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma
360 ; RV64-NEXT: vslideup.vx v8, v10, a0
362 %b = insertelement <8 x float> %a, float %y, i32 %idx
366 define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) {
367 ; CHECK-LABEL: insertelt_v8f32_store:
369 ; CHECK-NEXT: andi a1, a1, 7
370 ; CHECK-NEXT: slli a1, a1, 2
371 ; CHECK-NEXT: add a0, a0, a1
372 ; CHECK-NEXT: fsw fa0, 0(a0)
374 %a = load <8 x float>, ptr %x
375 %b = insertelement <8 x float> %a, float %y, i32 %idx
376 store <8 x float> %b, ptr %x
380 define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) {
381 ; CHECK-LABEL: insertelt_v8i64_0:
383 ; CHECK-NEXT: li a0, -1
384 ; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
385 ; CHECK-NEXT: vmv.s.x v8, a0
387 %b = insertelement <8 x i64> %a, i64 -1, i32 0
391 define void @insertelt_v8i64_0_store(ptr %x) {
392 ; RV32-LABEL: insertelt_v8i64_0_store:
394 ; RV32-NEXT: li a1, -1
395 ; RV32-NEXT: sw a1, 0(a0)
396 ; RV32-NEXT: sw a1, 4(a0)
399 ; RV64-LABEL: insertelt_v8i64_0_store:
401 ; RV64-NEXT: li a1, -1
402 ; RV64-NEXT: sd a1, 0(a0)
404 %a = load <8 x i64>, ptr %x
405 %b = insertelement <8 x i64> %a, i64 -1, i32 0
406 store <8 x i64> %b, ptr %x
410 define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
411 ; RV32-LABEL: insertelt_v8i64:
413 ; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
414 ; RV32-NEXT: vmv.v.i v12, -1
415 ; RV32-NEXT: addi a1, a0, 1
416 ; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
417 ; RV32-NEXT: vslideup.vx v8, v12, a0
420 ; RV64-LABEL: insertelt_v8i64:
422 ; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
423 ; RV64-NEXT: vmv.v.i v12, -1
424 ; RV64-NEXT: slli a0, a0, 32
425 ; RV64-NEXT: srli a0, a0, 32
426 ; RV64-NEXT: addi a1, a0, 1
427 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma
428 ; RV64-NEXT: vslideup.vx v8, v12, a0
430 %b = insertelement <8 x i64> %a, i64 -1, i32 %idx
434 define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
435 ; RV32-LABEL: insertelt_v8i64_store:
437 ; RV32-NEXT: andi a1, a1, 7
438 ; RV32-NEXT: slli a1, a1, 3
439 ; RV32-NEXT: add a0, a0, a1
440 ; RV32-NEXT: li a1, -1
441 ; RV32-NEXT: sw a1, 0(a0)
442 ; RV32-NEXT: sw a1, 4(a0)
445 ; RV64-LABEL: insertelt_v8i64_store:
447 ; RV64-NEXT: andi a1, a1, 7
448 ; RV64-NEXT: slli a1, a1, 3
449 ; RV64-NEXT: add a0, a0, a1
450 ; RV64-NEXT: li a1, -1
451 ; RV64-NEXT: sd a1, 0(a0)
453 %a = load <8 x i64>, ptr %x
454 %b = insertelement <8 x i64> %a, i64 -1, i32 %idx
455 store <8 x i64> %b, ptr %x
459 define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
460 ; CHECK-LABEL: insertelt_c6_v8i64_0:
462 ; CHECK-NEXT: li a0, 6
463 ; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
464 ; CHECK-NEXT: vmv.s.x v8, a0
466 %b = insertelement <8 x i64> %a, i64 6, i32 0
470 define void @insertelt_c6_v8i64_0_store(ptr %x) {
471 ; RV32-LABEL: insertelt_c6_v8i64_0_store:
473 ; RV32-NEXT: li a1, 6
474 ; RV32-NEXT: sw a1, 0(a0)
475 ; RV32-NEXT: sw zero, 4(a0)
478 ; RV64-LABEL: insertelt_c6_v8i64_0_store:
480 ; RV64-NEXT: li a1, 6
481 ; RV64-NEXT: sd a1, 0(a0)
483 %a = load <8 x i64>, ptr %x
484 %b = insertelement <8 x i64> %a, i64 6, i32 0
485 store <8 x i64> %b, ptr %x
489 define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
490 ; RV32-LABEL: insertelt_c6_v8i64:
492 ; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
493 ; RV32-NEXT: vmv.v.i v12, 6
494 ; RV32-NEXT: addi a1, a0, 1
495 ; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
496 ; RV32-NEXT: vslideup.vx v8, v12, a0
499 ; RV64-LABEL: insertelt_c6_v8i64:
501 ; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
502 ; RV64-NEXT: vmv.v.i v12, 6
503 ; RV64-NEXT: slli a0, a0, 32
504 ; RV64-NEXT: srli a0, a0, 32
505 ; RV64-NEXT: addi a1, a0, 1
506 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma
507 ; RV64-NEXT: vslideup.vx v8, v12, a0
509 %b = insertelement <8 x i64> %a, i64 6, i32 %idx
513 define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
514 ; RV32-LABEL: insertelt_c6_v8i64_store:
516 ; RV32-NEXT: andi a1, a1, 7
517 ; RV32-NEXT: slli a1, a1, 3
518 ; RV32-NEXT: add a0, a0, a1
519 ; RV32-NEXT: li a1, 6
520 ; RV32-NEXT: sw a1, 0(a0)
521 ; RV32-NEXT: sw zero, 4(a0)
524 ; RV64-LABEL: insertelt_c6_v8i64_store:
526 ; RV64-NEXT: andi a1, a1, 7
527 ; RV64-NEXT: slli a1, a1, 3
528 ; RV64-NEXT: add a0, a0, a1
529 ; RV64-NEXT: li a1, 6
530 ; RV64-NEXT: sd a1, 0(a0)
532 %a = load <8 x i64>, ptr %x
533 %b = insertelement <8 x i64> %a, i64 6, i32 %idx
534 store <8 x i64> %b, ptr %x
538 ; Test that using a insertelement at element 0 by a later operation doesn't
539 ; crash the compiler.
540 define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
541 ; CHECK-LABEL: insertelt_c6_v8i64_0_add:
543 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
544 ; CHECK-NEXT: vle64.v v8, (a0)
545 ; CHECK-NEXT: vle64.v v12, (a1)
546 ; CHECK-NEXT: li a1, 6
547 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
548 ; CHECK-NEXT: vmv.s.x v8, a1
549 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
550 ; CHECK-NEXT: vadd.vv v8, v8, v12
551 ; CHECK-NEXT: vse64.v v8, (a0)
553 %a = load <8 x i64>, ptr %x
554 %b = insertelement <8 x i64> %a, i64 6, i32 0
555 %c = load <8 x i64>, ptr %y
556 %d = add <8 x i64> %b, %c
557 store <8 x i64> %d, ptr %x
561 ; The next batch of tests cover inserts into high LMUL vectors when the
562 ; exact VLEM is known. FIXME: These can directly access the sub-registers
564 define <16 x i32> @insertelt_c0_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
565 ; CHECK-LABEL: insertelt_c0_v16xi32_exact:
567 ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
568 ; CHECK-NEXT: vmv.s.x v8, a0
570 %v = insertelement <16 x i32> %vin, i32 %a, i32 0
574 define <16 x i32> @insertelt_c1_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
575 ; CHECK-LABEL: insertelt_c1_v16xi32_exact:
577 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
578 ; CHECK-NEXT: vmv.s.x v12, a0
579 ; CHECK-NEXT: vslideup.vi v8, v12, 1
581 %v = insertelement <16 x i32> %vin, i32 %a, i32 1
585 define <16 x i32> @insertelt_c2_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
586 ; CHECK-LABEL: insertelt_c2_v16xi32_exact:
588 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
589 ; CHECK-NEXT: vmv.s.x v12, a0
590 ; CHECK-NEXT: vslideup.vi v8, v12, 2
592 %v = insertelement <16 x i32> %vin, i32 %a, i32 2
596 define <16 x i32> @insertelt_c3_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
597 ; CHECK-LABEL: insertelt_c3_v16xi32_exact:
599 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
600 ; CHECK-NEXT: vmv.s.x v12, a0
601 ; CHECK-NEXT: vslideup.vi v8, v12, 3
603 %v = insertelement <16 x i32> %vin, i32 %a, i32 3
607 define <16 x i32> @insertelt_c12_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
608 ; CHECK-LABEL: insertelt_c12_v16xi32_exact:
610 ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
611 ; CHECK-NEXT: vmv.s.x v11, a0
613 %v = insertelement <16 x i32> %vin, i32 %a, i32 12
617 define <16 x i32> @insertelt_c13_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
618 ; CHECK-LABEL: insertelt_c13_v16xi32_exact:
620 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
621 ; CHECK-NEXT: vmv.s.x v12, a0
622 ; CHECK-NEXT: vslideup.vi v11, v12, 1
624 %v = insertelement <16 x i32> %vin, i32 %a, i32 13
628 define <16 x i32> @insertelt_c14_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
629 ; CHECK-LABEL: insertelt_c14_v16xi32_exact:
631 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
632 ; CHECK-NEXT: vmv.s.x v12, a0
633 ; CHECK-NEXT: vslideup.vi v11, v12, 2
635 %v = insertelement <16 x i32> %vin, i32 %a, i32 14
639 define <16 x i32> @insertelt_c15_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
640 ; CHECK-LABEL: insertelt_c15_v16xi32_exact:
642 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
643 ; CHECK-NEXT: vmv.s.x v12, a0
644 ; CHECK-NEXT: vslideup.vi v11, v12, 3
646 %v = insertelement <16 x i32> %vin, i32 %a, i32 15
650 define <8 x i64> @insertelt_c4_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range(2,2) {
651 ; RV32-LABEL: insertelt_c4_v8xi64_exact:
653 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
654 ; RV32-NEXT: vslide1down.vx v10, v10, a0
655 ; RV32-NEXT: vslide1down.vx v10, v10, a1
658 ; RV64-LABEL: insertelt_c4_v8xi64_exact:
660 ; RV64-NEXT: vsetivli zero, 8, e64, m1, tu, ma
661 ; RV64-NEXT: vmv.s.x v10, a0
663 %v = insertelement <8 x i64> %vin, i64 %a, i32 4
667 define <8 x i64> @insertelt_c5_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range(2,2) {
668 ; RV32-LABEL: insertelt_c5_v8xi64_exact:
670 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
671 ; RV32-NEXT: vslide1down.vx v12, v8, a0
672 ; RV32-NEXT: vslide1down.vx v12, v12, a1
673 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
674 ; RV32-NEXT: vslideup.vi v10, v12, 1
677 ; RV64-LABEL: insertelt_c5_v8xi64_exact:
679 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
680 ; RV64-NEXT: vmv.s.x v12, a0
681 ; RV64-NEXT: vslideup.vi v10, v12, 1
683 %v = insertelement <8 x i64> %vin, i64 %a, i32 5
687 define <4 x bfloat> @insertelt_v4bf16_0(<4 x bfloat> %a, bfloat %y) {
688 ; CHECK-LABEL: insertelt_v4bf16_0:
690 ; CHECK-NEXT: fmv.x.h a0, fa0
691 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
692 ; CHECK-NEXT: vmv.s.x v8, a0
694 %b = insertelement <4 x bfloat> %a, bfloat %y, i32 0
698 define <4 x bfloat> @insertelt_v4bf16_3(<4 x bfloat> %a, bfloat %y) {
699 ; CHECK-LABEL: insertelt_v4bf16_3:
701 ; CHECK-NEXT: fmv.x.h a0, fa0
702 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
703 ; CHECK-NEXT: vmv.s.x v9, a0
704 ; CHECK-NEXT: vslideup.vi v8, v9, 3
706 %b = insertelement <4 x bfloat> %a, bfloat %y, i32 3
710 define <4 x bfloat> @insertelt_v4bf16_idx(<4 x bfloat> %a, bfloat %y, i32 zeroext %idx) {
711 ; CHECK-LABEL: insertelt_v4bf16_idx:
713 ; CHECK-NEXT: addi a1, a0, 1
714 ; CHECK-NEXT: fmv.x.h a2, fa0
715 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma
716 ; CHECK-NEXT: vmv.s.x v9, a2
717 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
718 ; CHECK-NEXT: vslideup.vx v8, v9, a0
720 %b = insertelement <4 x bfloat> %a, bfloat %y, i32 %idx
724 define <4 x half> @insertelt_v4f16_0(<4 x half> %a, half %y) {
725 ; ZVFH-LABEL: insertelt_v4f16_0:
727 ; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
728 ; ZVFH-NEXT: vfmv.s.f v8, fa0
731 ; ZVFHMIN-LABEL: insertelt_v4f16_0:
733 ; ZVFHMIN-NEXT: fmv.x.h a0, fa0
734 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
735 ; ZVFHMIN-NEXT: vmv.s.x v8, a0
737 %b = insertelement <4 x half> %a, half %y, i32 0
741 define <4 x half> @insertelt_v4f16_3(<4 x half> %a, half %y) {
742 ; ZVFH-LABEL: insertelt_v4f16_3:
744 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
745 ; ZVFH-NEXT: vfmv.s.f v9, fa0
746 ; ZVFH-NEXT: vslideup.vi v8, v9, 3
749 ; ZVFHMIN-LABEL: insertelt_v4f16_3:
751 ; ZVFHMIN-NEXT: fmv.x.h a0, fa0
752 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
753 ; ZVFHMIN-NEXT: vmv.s.x v9, a0
754 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
756 %b = insertelement <4 x half> %a, half %y, i32 3
760 define <4 x half> @insertelt_v4f16_idx(<4 x half> %a, half %y, i32 zeroext %idx) {
761 ; ZVFH-LABEL: insertelt_v4f16_idx:
763 ; ZVFH-NEXT: addi a1, a0, 1
764 ; ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma
765 ; ZVFH-NEXT: vfmv.s.f v9, fa0
766 ; ZVFH-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
767 ; ZVFH-NEXT: vslideup.vx v8, v9, a0
770 ; ZVFHMIN-LABEL: insertelt_v4f16_idx:
772 ; ZVFHMIN-NEXT: addi a1, a0, 1
773 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0
774 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma
775 ; ZVFHMIN-NEXT: vmv.s.x v9, a2
776 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
777 ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0
779 %b = insertelement <4 x half> %a, half %y, i32 %idx
782 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
783 ; ZVFHMINRV32: {{.*}}
784 ; ZVFHMINRV64: {{.*}}