1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; Tests that a floating-point build_vector doesn't try and generate a VID
8 define void @buildvec_no_vid_v4f32(<4 x float>* %x) {
9 ; CHECK-LABEL: buildvec_no_vid_v4f32:
11 ; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
12 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0)
13 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14 ; CHECK-NEXT: vle32.v v8, (a1)
15 ; CHECK-NEXT: vse32.v v8, (a0)
17 store <4 x float> <float 0.0, float 4.0, float 0.0, float 2.0>, <4 x float>* %x
21 ; Not all BUILD_VECTORs are successfully lowered by the backend: some are
22 ; expanded into scalarized stack stores. However, this may result in an
23 ; infinite loop in the DAGCombiner which tries to recombine those stores into a
24 ; BUILD_VECTOR followed by a vector store. The BUILD_VECTOR is then expanded
25 ; and the loop begins.
26 ; Until all BUILD_VECTORs are lowered, we disable store-combining after
27 ; legalization for fixed-length vectors.
28 ; This test uses a trick with a shufflevector which can't be lowered to a
29 ; SHUFFLE_VECTOR node; the mask is shorter than the source vectors and the
30 ; shuffle indices aren't located within the same 4-element subvector, so is
31 ; expanded to 4 EXTRACT_VECTOR_ELTs and a BUILD_VECTOR. This then triggers the
33 define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize {
34 ; CHECK-LABEL: hang_when_merging_stores_after_legalization:
36 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
37 ; CHECK-NEXT: vid.v v12
38 ; CHECK-NEXT: li a0, 7
39 ; CHECK-NEXT: vmul.vx v14, v12, a0
40 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
41 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
42 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
43 ; CHECK-NEXT: vadd.vi v8, v14, -14
44 ; CHECK-NEXT: vmv.v.i v0, 12
45 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
46 ; CHECK-NEXT: vrgatherei16.vv v12, v10, v8, v0.t
47 ; CHECK-NEXT: vmv1r.v v8, v12
49 %z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
53 define void @buildvec_dominant0_v2f32(<2 x float>* %x) {
54 ; CHECK-LABEL: buildvec_dominant0_v2f32:
56 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
57 ; CHECK-NEXT: vid.v v8
58 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
59 ; CHECK-NEXT: vse32.v v8, (a0)
61 store <2 x float> <float 0.0, float 1.0>, <2 x float>* %x
65 ; We don't want to lower this to the insertion of two scalar elements as above,
66 ; as each would require their own load from the constant pool.
68 define void @buildvec_dominant1_v2f32(<2 x float>* %x) {
69 ; CHECK-LABEL: buildvec_dominant1_v2f32:
71 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
72 ; CHECK-NEXT: vid.v v8
73 ; CHECK-NEXT: vadd.vi v8, v8, 1
74 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
75 ; CHECK-NEXT: vse32.v v8, (a0)
77 store <2 x float> <float 1.0, float 2.0>, <2 x float>* %x
81 define void @buildvec_dominant0_v4f32(<4 x float>* %x) {
82 ; CHECK-LABEL: buildvec_dominant0_v4f32:
84 ; CHECK-NEXT: lui a1, 262144
85 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
86 ; CHECK-NEXT: vmv.v.x v8, a1
87 ; CHECK-NEXT: vmv.s.x v9, zero
88 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
89 ; CHECK-NEXT: vslideup.vi v8, v9, 2
90 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
91 ; CHECK-NEXT: vse32.v v8, (a0)
93 store <4 x float> <float 2.0, float 2.0, float 0.0, float 2.0>, <4 x float>* %x
97 define void @buildvec_dominant1_v4f32(<4 x float>* %x, float %f) {
98 ; CHECK-LABEL: buildvec_dominant1_v4f32:
100 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
101 ; CHECK-NEXT: vfmv.v.f v8, fa0
102 ; CHECK-NEXT: vmv.s.x v9, zero
103 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
104 ; CHECK-NEXT: vslideup.vi v8, v9, 1
105 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
106 ; CHECK-NEXT: vse32.v v8, (a0)
108 %v0 = insertelement <4 x float> poison, float %f, i32 0
109 %v1 = insertelement <4 x float> %v0, float 0.0, i32 1
110 %v2 = insertelement <4 x float> %v1, float %f, i32 2
111 %v3 = insertelement <4 x float> %v2, float %f, i32 3
112 store <4 x float> %v3, <4 x float>* %x
116 define void @buildvec_dominant2_v4f32(<4 x float>* %x, float %f) {
117 ; CHECK-LABEL: buildvec_dominant2_v4f32:
119 ; CHECK-NEXT: lui a1, 262144
120 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
121 ; CHECK-NEXT: vmv.s.x v8, a1
122 ; CHECK-NEXT: vfmv.v.f v9, fa0
123 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
124 ; CHECK-NEXT: vslideup.vi v9, v8, 1
125 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
126 ; CHECK-NEXT: vse32.v v9, (a0)
128 %v0 = insertelement <4 x float> poison, float %f, i32 0
129 %v1 = insertelement <4 x float> %v0, float 2.0, i32 1
130 %v2 = insertelement <4 x float> %v1, float %f, i32 2
131 %v3 = insertelement <4 x float> %v2, float %f, i32 3
132 store <4 x float> %v3, <4 x float>* %x
136 define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) {
137 ; CHECK-LABEL: buildvec_merge0_v4f32:
139 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
140 ; CHECK-NEXT: vfmv.v.f v8, fa0
141 ; CHECK-NEXT: vmv.v.i v0, 6
142 ; CHECK-NEXT: lui a1, 262144
143 ; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0
144 ; CHECK-NEXT: vse32.v v8, (a0)
146 %v0 = insertelement <4 x float> poison, float %f, i32 0
147 %v1 = insertelement <4 x float> %v0, float 2.0, i32 1
148 %v2 = insertelement <4 x float> %v1, float 2.0, i32 2
149 %v3 = insertelement <4 x float> %v2, float %f, i32 3
150 store <4 x float> %v3, <4 x float>* %x
154 define <4 x half> @splat_c3_v4f16(<4 x half> %v) {
155 ; CHECK-LABEL: splat_c3_v4f16:
157 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
158 ; CHECK-NEXT: vrgather.vi v9, v8, 3
159 ; CHECK-NEXT: vmv1r.v v8, v9
161 %x = extractelement <4 x half> %v, i32 3
162 %ins = insertelement <4 x half> poison, half %x, i32 0
163 %splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer
164 ret <4 x half> %splat
167 define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
168 ; CHECK-LABEL: splat_idx_v4f16:
170 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
171 ; CHECK-NEXT: vrgather.vx v9, v8, a0
172 ; CHECK-NEXT: vmv1r.v v8, v9
174 %x = extractelement <4 x half> %v, i64 %idx
175 %ins = insertelement <4 x half> poison, half %x, i32 0
176 %splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer
177 ret <4 x half> %splat
180 define <8 x float> @splat_c5_v8f32(<8 x float> %v) {
181 ; CHECK-LABEL: splat_c5_v8f32:
183 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
184 ; CHECK-NEXT: vrgather.vi v10, v8, 5
185 ; CHECK-NEXT: vmv.v.v v8, v10
187 %x = extractelement <8 x float> %v, i32 5
188 %ins = insertelement <8 x float> poison, float %x, i32 0
189 %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer
190 ret <8 x float> %splat
193 define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) {
195 ; CHECK-LABEL: splat_idx_v8f32:
197 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
198 ; CHECK-NEXT: vrgather.vx v10, v8, a0
199 ; CHECK-NEXT: vmv.v.v v8, v10
201 %x = extractelement <8 x float> %v, i64 %idx
202 %ins = insertelement <8 x float> poison, float %x, i32 0
203 %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer
204 ret <8 x float> %splat
207 ; Test that we pull the vlse of the constant pool out of the loop.
208 define dso_local void @splat_load_licm(float* %0) {
209 ; CHECK-LABEL: splat_load_licm:
211 ; CHECK-NEXT: lui a1, 1
212 ; CHECK-NEXT: add a1, a0, a1
213 ; CHECK-NEXT: lui a2, 263168
214 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215 ; CHECK-NEXT: vmv.v.x v8, a2
216 ; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
217 ; CHECK-NEXT: vse32.v v8, (a0)
218 ; CHECK-NEXT: addi a0, a0, 16
219 ; CHECK-NEXT: bne a0, a1, .LBB12_1
220 ; CHECK-NEXT: # %bb.2:
225 %3 = phi i32 [ 0, %1 ], [ %6, %2 ]
226 %4 = getelementptr inbounds float, float* %0, i32 %3
227 %5 = bitcast float* %4 to <4 x float>*
228 store <4 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, <4 x float>* %5, align 4
229 %6 = add nuw i32 %3, 4
230 %7 = icmp eq i32 %6, 1024
231 br i1 %7, label %8, label %2
237 define <2 x half> @buildvec_v2f16(half %a, half %b) {
238 ; CHECK-LABEL: buildvec_v2f16:
240 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
241 ; CHECK-NEXT: vfmv.v.f v8, fa0
242 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
244 %v1 = insertelement <2 x half> poison, half %a, i64 0
245 %v2 = insertelement <2 x half> %v1, half %b, i64 1
249 define <2 x float> @buildvec_v2f32(float %a, float %b) {
250 ; CHECK-LABEL: buildvec_v2f32:
252 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
253 ; CHECK-NEXT: vfmv.v.f v8, fa0
254 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
256 %v1 = insertelement <2 x float> poison, float %a, i64 0
257 %v2 = insertelement <2 x float> %v1, float %b, i64 1
261 define <2 x double> @buildvec_v2f64(double %a, double %b) {
262 ; CHECK-LABEL: buildvec_v2f64:
264 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
265 ; CHECK-NEXT: vfmv.v.f v8, fa0
266 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
268 %v1 = insertelement <2 x double> poison, double %a, i64 0
269 %v2 = insertelement <2 x double> %v1, double %b, i64 1
273 define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
274 ; CHECK-LABEL: buildvec_v2f64_b:
276 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
277 ; CHECK-NEXT: vfmv.v.f v8, fa0
278 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
280 %v1 = insertelement <2 x double> poison, double %b, i64 1
281 %v2 = insertelement <2 x double> %v1, double %a, i64 0
285 define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
286 ; CHECK-LABEL: buildvec_v4f32:
288 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
289 ; CHECK-NEXT: vfmv.v.f v8, fa0
290 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
291 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
292 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
294 %v1 = insertelement <4 x float> poison, float %a, i64 0
295 %v2 = insertelement <4 x float> %v1, float %b, i64 1
296 %v3 = insertelement <4 x float> %v2, float %c, i64 2
297 %v4 = insertelement <4 x float> %v3, float %d, i64 3
301 define <8 x float> @buildvec_v8f32(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) {
302 ; CHECK-LABEL: buildvec_v8f32:
304 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
305 ; CHECK-NEXT: vfmv.v.f v8, fa0
306 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
307 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
308 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
309 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
310 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
311 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
312 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
314 %v0 = insertelement <8 x float> poison, float %e0, i64 0
315 %v1 = insertelement <8 x float> %v0, float %e1, i64 1
316 %v2 = insertelement <8 x float> %v1, float %e2, i64 2
317 %v3 = insertelement <8 x float> %v2, float %e3, i64 3
318 %v4 = insertelement <8 x float> %v3, float %e4, i64 4
319 %v5 = insertelement <8 x float> %v4, float %e5, i64 5
320 %v6 = insertelement <8 x float> %v5, float %e6, i64 6
321 %v7 = insertelement <8 x float> %v6, float %e7, i64 7
325 define <16 x float> @buildvec_v16f32(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7, float %e8, float %e9, float %e10, float %e11, float %e12, float %e13, float %e14, float %e15) {
326 ; RV32-LABEL: buildvec_v16f32:
328 ; RV32-NEXT: addi sp, sp, -128
329 ; RV32-NEXT: .cfi_def_cfa_offset 128
330 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
331 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
332 ; RV32-NEXT: .cfi_offset ra, -4
333 ; RV32-NEXT: .cfi_offset s0, -8
334 ; RV32-NEXT: addi s0, sp, 128
335 ; RV32-NEXT: .cfi_def_cfa s0, 0
336 ; RV32-NEXT: andi sp, sp, -64
337 ; RV32-NEXT: sw a7, 60(sp)
338 ; RV32-NEXT: sw a6, 56(sp)
339 ; RV32-NEXT: sw a5, 52(sp)
340 ; RV32-NEXT: sw a4, 48(sp)
341 ; RV32-NEXT: sw a3, 44(sp)
342 ; RV32-NEXT: sw a2, 40(sp)
343 ; RV32-NEXT: sw a1, 36(sp)
344 ; RV32-NEXT: sw a0, 32(sp)
345 ; RV32-NEXT: fsw fa7, 28(sp)
346 ; RV32-NEXT: fsw fa6, 24(sp)
347 ; RV32-NEXT: fsw fa5, 20(sp)
348 ; RV32-NEXT: fsw fa4, 16(sp)
349 ; RV32-NEXT: fsw fa3, 12(sp)
350 ; RV32-NEXT: fsw fa2, 8(sp)
351 ; RV32-NEXT: fsw fa1, 4(sp)
352 ; RV32-NEXT: fsw fa0, 0(sp)
353 ; RV32-NEXT: mv a0, sp
354 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
355 ; RV32-NEXT: vle32.v v8, (a0)
356 ; RV32-NEXT: addi sp, s0, -128
357 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
358 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
359 ; RV32-NEXT: addi sp, sp, 128
362 ; RV64-LABEL: buildvec_v16f32:
364 ; RV64-NEXT: addi sp, sp, -128
365 ; RV64-NEXT: .cfi_def_cfa_offset 128
366 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
367 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
368 ; RV64-NEXT: .cfi_offset ra, -8
369 ; RV64-NEXT: .cfi_offset s0, -16
370 ; RV64-NEXT: addi s0, sp, 128
371 ; RV64-NEXT: .cfi_def_cfa s0, 0
372 ; RV64-NEXT: andi sp, sp, -64
373 ; RV64-NEXT: fmv.w.x ft0, a0
374 ; RV64-NEXT: fmv.w.x ft1, a1
375 ; RV64-NEXT: fmv.w.x ft2, a2
376 ; RV64-NEXT: fmv.w.x ft3, a3
377 ; RV64-NEXT: fmv.w.x ft4, a4
378 ; RV64-NEXT: fmv.w.x ft5, a5
379 ; RV64-NEXT: fmv.w.x ft6, a6
380 ; RV64-NEXT: fmv.w.x ft7, a7
381 ; RV64-NEXT: fsw fa7, 28(sp)
382 ; RV64-NEXT: fsw fa6, 24(sp)
383 ; RV64-NEXT: fsw fa5, 20(sp)
384 ; RV64-NEXT: fsw fa4, 16(sp)
385 ; RV64-NEXT: fsw fa3, 12(sp)
386 ; RV64-NEXT: fsw fa2, 8(sp)
387 ; RV64-NEXT: fsw fa1, 4(sp)
388 ; RV64-NEXT: fsw fa0, 0(sp)
389 ; RV64-NEXT: fsw ft7, 60(sp)
390 ; RV64-NEXT: fsw ft6, 56(sp)
391 ; RV64-NEXT: fsw ft5, 52(sp)
392 ; RV64-NEXT: fsw ft4, 48(sp)
393 ; RV64-NEXT: fsw ft3, 44(sp)
394 ; RV64-NEXT: fsw ft2, 40(sp)
395 ; RV64-NEXT: fsw ft1, 36(sp)
396 ; RV64-NEXT: fsw ft0, 32(sp)
397 ; RV64-NEXT: mv a0, sp
398 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
399 ; RV64-NEXT: vle32.v v8, (a0)
400 ; RV64-NEXT: addi sp, s0, -128
401 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
402 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
403 ; RV64-NEXT: addi sp, sp, 128
405 %v0 = insertelement <16 x float> poison, float %e0, i64 0
406 %v1 = insertelement <16 x float> %v0, float %e1, i64 1
407 %v2 = insertelement <16 x float> %v1, float %e2, i64 2
408 %v3 = insertelement <16 x float> %v2, float %e3, i64 3
409 %v4 = insertelement <16 x float> %v3, float %e4, i64 4
410 %v5 = insertelement <16 x float> %v4, float %e5, i64 5
411 %v6 = insertelement <16 x float> %v5, float %e6, i64 6
412 %v7 = insertelement <16 x float> %v6, float %e7, i64 7
413 %v8 = insertelement <16 x float> %v7, float %e8, i64 8
414 %v9 = insertelement <16 x float> %v8, float %e9, i64 9
415 %v10 = insertelement <16 x float> %v9, float %e10, i64 10
416 %v11 = insertelement <16 x float> %v10, float %e11, i64 11
417 %v12 = insertelement <16 x float> %v11, float %e12, i64 12
418 %v13 = insertelement <16 x float> %v12, float %e13, i64 13
419 %v14 = insertelement <16 x float> %v13, float %e14, i64 14
420 %v15 = insertelement <16 x float> %v14, float %e15, i64 15
421 ret <16 x float> %v15
424 define <32 x float> @buildvec_v32f32(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7, float %e8, float %e9, float %e10, float %e11, float %e12, float %e13, float %e14, float %e15, float %e16, float %e17, float %e18, float %e19, float %e20, float %e21, float %e22, float %e23, float %e24, float %e25, float %e26, float %e27, float %e28, float %e29, float %e30, float %e31) {
425 ; RV32-LABEL: buildvec_v32f32:
427 ; RV32-NEXT: addi sp, sp, -256
428 ; RV32-NEXT: .cfi_def_cfa_offset 256
429 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
430 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
431 ; RV32-NEXT: fsd fs0, 240(sp) # 8-byte Folded Spill
432 ; RV32-NEXT: fsd fs1, 232(sp) # 8-byte Folded Spill
433 ; RV32-NEXT: fsd fs2, 224(sp) # 8-byte Folded Spill
434 ; RV32-NEXT: fsd fs3, 216(sp) # 8-byte Folded Spill
435 ; RV32-NEXT: .cfi_offset ra, -4
436 ; RV32-NEXT: .cfi_offset s0, -8
437 ; RV32-NEXT: .cfi_offset fs0, -16
438 ; RV32-NEXT: .cfi_offset fs1, -24
439 ; RV32-NEXT: .cfi_offset fs2, -32
440 ; RV32-NEXT: .cfi_offset fs3, -40
441 ; RV32-NEXT: addi s0, sp, 256
442 ; RV32-NEXT: .cfi_def_cfa s0, 0
443 ; RV32-NEXT: andi sp, sp, -128
444 ; RV32-NEXT: flw ft0, 0(s0)
445 ; RV32-NEXT: flw ft1, 4(s0)
446 ; RV32-NEXT: flw ft2, 8(s0)
447 ; RV32-NEXT: flw ft3, 12(s0)
448 ; RV32-NEXT: flw ft4, 16(s0)
449 ; RV32-NEXT: flw ft5, 20(s0)
450 ; RV32-NEXT: flw ft6, 24(s0)
451 ; RV32-NEXT: flw ft7, 28(s0)
452 ; RV32-NEXT: flw ft8, 32(s0)
453 ; RV32-NEXT: flw ft9, 36(s0)
454 ; RV32-NEXT: flw ft10, 40(s0)
455 ; RV32-NEXT: flw ft11, 44(s0)
456 ; RV32-NEXT: flw fs0, 60(s0)
457 ; RV32-NEXT: flw fs1, 56(s0)
458 ; RV32-NEXT: flw fs2, 52(s0)
459 ; RV32-NEXT: flw fs3, 48(s0)
460 ; RV32-NEXT: fsw fs0, 124(sp)
461 ; RV32-NEXT: fsw fs1, 120(sp)
462 ; RV32-NEXT: fsw fs2, 116(sp)
463 ; RV32-NEXT: fsw fs3, 112(sp)
464 ; RV32-NEXT: fsw ft11, 108(sp)
465 ; RV32-NEXT: fsw ft10, 104(sp)
466 ; RV32-NEXT: fsw ft9, 100(sp)
467 ; RV32-NEXT: fsw ft8, 96(sp)
468 ; RV32-NEXT: fsw ft7, 92(sp)
469 ; RV32-NEXT: fsw ft6, 88(sp)
470 ; RV32-NEXT: fsw ft5, 84(sp)
471 ; RV32-NEXT: fsw ft4, 80(sp)
472 ; RV32-NEXT: fsw ft3, 76(sp)
473 ; RV32-NEXT: fsw ft2, 72(sp)
474 ; RV32-NEXT: fsw ft1, 68(sp)
475 ; RV32-NEXT: fsw ft0, 64(sp)
476 ; RV32-NEXT: sw a7, 60(sp)
477 ; RV32-NEXT: sw a6, 56(sp)
478 ; RV32-NEXT: sw a5, 52(sp)
479 ; RV32-NEXT: sw a4, 48(sp)
480 ; RV32-NEXT: sw a3, 44(sp)
481 ; RV32-NEXT: sw a2, 40(sp)
482 ; RV32-NEXT: sw a1, 36(sp)
483 ; RV32-NEXT: sw a0, 32(sp)
484 ; RV32-NEXT: fsw fa7, 28(sp)
485 ; RV32-NEXT: fsw fa6, 24(sp)
486 ; RV32-NEXT: fsw fa5, 20(sp)
487 ; RV32-NEXT: fsw fa4, 16(sp)
488 ; RV32-NEXT: fsw fa3, 12(sp)
489 ; RV32-NEXT: fsw fa2, 8(sp)
490 ; RV32-NEXT: fsw fa1, 4(sp)
491 ; RV32-NEXT: fsw fa0, 0(sp)
492 ; RV32-NEXT: li a0, 32
493 ; RV32-NEXT: mv a1, sp
494 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
495 ; RV32-NEXT: vle32.v v8, (a1)
496 ; RV32-NEXT: addi sp, s0, -256
497 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
498 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
499 ; RV32-NEXT: fld fs0, 240(sp) # 8-byte Folded Reload
500 ; RV32-NEXT: fld fs1, 232(sp) # 8-byte Folded Reload
501 ; RV32-NEXT: fld fs2, 224(sp) # 8-byte Folded Reload
502 ; RV32-NEXT: fld fs3, 216(sp) # 8-byte Folded Reload
503 ; RV32-NEXT: addi sp, sp, 256
506 ; RV64-LABEL: buildvec_v32f32:
508 ; RV64-NEXT: addi sp, sp, -256
509 ; RV64-NEXT: .cfi_def_cfa_offset 256
510 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
511 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
512 ; RV64-NEXT: fsd fs0, 232(sp) # 8-byte Folded Spill
513 ; RV64-NEXT: fsd fs1, 224(sp) # 8-byte Folded Spill
514 ; RV64-NEXT: fsd fs2, 216(sp) # 8-byte Folded Spill
515 ; RV64-NEXT: fsd fs3, 208(sp) # 8-byte Folded Spill
516 ; RV64-NEXT: fsd fs4, 200(sp) # 8-byte Folded Spill
517 ; RV64-NEXT: fsd fs5, 192(sp) # 8-byte Folded Spill
518 ; RV64-NEXT: fsd fs6, 184(sp) # 8-byte Folded Spill
519 ; RV64-NEXT: fsd fs7, 176(sp) # 8-byte Folded Spill
520 ; RV64-NEXT: fsd fs8, 168(sp) # 8-byte Folded Spill
521 ; RV64-NEXT: fsd fs9, 160(sp) # 8-byte Folded Spill
522 ; RV64-NEXT: fsd fs10, 152(sp) # 8-byte Folded Spill
523 ; RV64-NEXT: fsd fs11, 144(sp) # 8-byte Folded Spill
524 ; RV64-NEXT: .cfi_offset ra, -8
525 ; RV64-NEXT: .cfi_offset s0, -16
526 ; RV64-NEXT: .cfi_offset fs0, -24
527 ; RV64-NEXT: .cfi_offset fs1, -32
528 ; RV64-NEXT: .cfi_offset fs2, -40
529 ; RV64-NEXT: .cfi_offset fs3, -48
530 ; RV64-NEXT: .cfi_offset fs4, -56
531 ; RV64-NEXT: .cfi_offset fs5, -64
532 ; RV64-NEXT: .cfi_offset fs6, -72
533 ; RV64-NEXT: .cfi_offset fs7, -80
534 ; RV64-NEXT: .cfi_offset fs8, -88
535 ; RV64-NEXT: .cfi_offset fs9, -96
536 ; RV64-NEXT: .cfi_offset fs10, -104
537 ; RV64-NEXT: .cfi_offset fs11, -112
538 ; RV64-NEXT: addi s0, sp, 256
539 ; RV64-NEXT: .cfi_def_cfa s0, 0
540 ; RV64-NEXT: andi sp, sp, -128
541 ; RV64-NEXT: fmv.w.x ft0, a0
542 ; RV64-NEXT: fmv.w.x ft1, a1
543 ; RV64-NEXT: fmv.w.x ft2, a2
544 ; RV64-NEXT: fmv.w.x ft3, a3
545 ; RV64-NEXT: fmv.w.x ft4, a4
546 ; RV64-NEXT: fmv.w.x ft5, a5
547 ; RV64-NEXT: fmv.w.x ft6, a6
548 ; RV64-NEXT: fmv.w.x ft7, a7
549 ; RV64-NEXT: flw ft8, 0(s0)
550 ; RV64-NEXT: flw ft9, 8(s0)
551 ; RV64-NEXT: flw ft10, 16(s0)
552 ; RV64-NEXT: flw ft11, 24(s0)
553 ; RV64-NEXT: flw fs0, 32(s0)
554 ; RV64-NEXT: flw fs1, 40(s0)
555 ; RV64-NEXT: flw fs2, 48(s0)
556 ; RV64-NEXT: flw fs3, 56(s0)
557 ; RV64-NEXT: flw fs4, 64(s0)
558 ; RV64-NEXT: flw fs5, 72(s0)
559 ; RV64-NEXT: flw fs6, 80(s0)
560 ; RV64-NEXT: flw fs7, 88(s0)
561 ; RV64-NEXT: flw fs8, 120(s0)
562 ; RV64-NEXT: flw fs9, 112(s0)
563 ; RV64-NEXT: flw fs10, 104(s0)
564 ; RV64-NEXT: flw fs11, 96(s0)
565 ; RV64-NEXT: fsw fs8, 124(sp)
566 ; RV64-NEXT: fsw fs9, 120(sp)
567 ; RV64-NEXT: fsw fs10, 116(sp)
568 ; RV64-NEXT: fsw fs11, 112(sp)
569 ; RV64-NEXT: fsw fs7, 108(sp)
570 ; RV64-NEXT: fsw fs6, 104(sp)
571 ; RV64-NEXT: fsw fs5, 100(sp)
572 ; RV64-NEXT: fsw fs4, 96(sp)
573 ; RV64-NEXT: fsw fs3, 92(sp)
574 ; RV64-NEXT: fsw fs2, 88(sp)
575 ; RV64-NEXT: fsw fs1, 84(sp)
576 ; RV64-NEXT: fsw fs0, 80(sp)
577 ; RV64-NEXT: fsw ft11, 76(sp)
578 ; RV64-NEXT: fsw ft10, 72(sp)
579 ; RV64-NEXT: fsw ft9, 68(sp)
580 ; RV64-NEXT: fsw ft8, 64(sp)
581 ; RV64-NEXT: fsw fa7, 28(sp)
582 ; RV64-NEXT: fsw fa6, 24(sp)
583 ; RV64-NEXT: fsw fa5, 20(sp)
584 ; RV64-NEXT: fsw fa4, 16(sp)
585 ; RV64-NEXT: fsw fa3, 12(sp)
586 ; RV64-NEXT: fsw fa2, 8(sp)
587 ; RV64-NEXT: fsw fa1, 4(sp)
588 ; RV64-NEXT: fsw fa0, 0(sp)
589 ; RV64-NEXT: fsw ft7, 60(sp)
590 ; RV64-NEXT: fsw ft6, 56(sp)
591 ; RV64-NEXT: fsw ft5, 52(sp)
592 ; RV64-NEXT: fsw ft4, 48(sp)
593 ; RV64-NEXT: fsw ft3, 44(sp)
594 ; RV64-NEXT: fsw ft2, 40(sp)
595 ; RV64-NEXT: fsw ft1, 36(sp)
596 ; RV64-NEXT: fsw ft0, 32(sp)
597 ; RV64-NEXT: li a0, 32
598 ; RV64-NEXT: mv a1, sp
599 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
600 ; RV64-NEXT: vle32.v v8, (a1)
601 ; RV64-NEXT: addi sp, s0, -256
602 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
603 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
604 ; RV64-NEXT: fld fs0, 232(sp) # 8-byte Folded Reload
605 ; RV64-NEXT: fld fs1, 224(sp) # 8-byte Folded Reload
606 ; RV64-NEXT: fld fs2, 216(sp) # 8-byte Folded Reload
607 ; RV64-NEXT: fld fs3, 208(sp) # 8-byte Folded Reload
608 ; RV64-NEXT: fld fs4, 200(sp) # 8-byte Folded Reload
609 ; RV64-NEXT: fld fs5, 192(sp) # 8-byte Folded Reload
610 ; RV64-NEXT: fld fs6, 184(sp) # 8-byte Folded Reload
611 ; RV64-NEXT: fld fs7, 176(sp) # 8-byte Folded Reload
612 ; RV64-NEXT: fld fs8, 168(sp) # 8-byte Folded Reload
613 ; RV64-NEXT: fld fs9, 160(sp) # 8-byte Folded Reload
614 ; RV64-NEXT: fld fs10, 152(sp) # 8-byte Folded Reload
615 ; RV64-NEXT: fld fs11, 144(sp) # 8-byte Folded Reload
616 ; RV64-NEXT: addi sp, sp, 256
618 %v0 = insertelement <32 x float> poison, float %e0, i64 0
619 %v1 = insertelement <32 x float> %v0, float %e1, i64 1
620 %v2 = insertelement <32 x float> %v1, float %e2, i64 2
621 %v3 = insertelement <32 x float> %v2, float %e3, i64 3
622 %v4 = insertelement <32 x float> %v3, float %e4, i64 4
623 %v5 = insertelement <32 x float> %v4, float %e5, i64 5
624 %v6 = insertelement <32 x float> %v5, float %e6, i64 6
625 %v7 = insertelement <32 x float> %v6, float %e7, i64 7
626 %v8 = insertelement <32 x float> %v7, float %e8, i64 8
627 %v9 = insertelement <32 x float> %v8, float %e9, i64 9
628 %v10 = insertelement <32 x float> %v9, float %e10, i64 10
629 %v11 = insertelement <32 x float> %v10, float %e11, i64 11
630 %v12 = insertelement <32 x float> %v11, float %e12, i64 12
631 %v13 = insertelement <32 x float> %v12, float %e13, i64 13
632 %v14 = insertelement <32 x float> %v13, float %e14, i64 14
633 %v15 = insertelement <32 x float> %v14, float %e15, i64 15
634 %v16 = insertelement <32 x float> %v15, float %e16, i64 16
635 %v17 = insertelement <32 x float> %v16, float %e17, i64 17
636 %v18 = insertelement <32 x float> %v17, float %e18, i64 18
637 %v19 = insertelement <32 x float> %v18, float %e19, i64 19
638 %v20 = insertelement <32 x float> %v19, float %e20, i64 20
639 %v21 = insertelement <32 x float> %v20, float %e21, i64 21
640 %v22 = insertelement <32 x float> %v21, float %e22, i64 22
641 %v23 = insertelement <32 x float> %v22, float %e23, i64 23
642 %v24 = insertelement <32 x float> %v23, float %e24, i64 24
643 %v25 = insertelement <32 x float> %v24, float %e25, i64 25
644 %v26 = insertelement <32 x float> %v25, float %e26, i64 26
645 %v27 = insertelement <32 x float> %v26, float %e27, i64 27
646 %v28 = insertelement <32 x float> %v27, float %e28, i64 28
647 %v29 = insertelement <32 x float> %v28, float %e29, i64 29
648 %v30 = insertelement <32 x float> %v29, float %e30, i64 30
649 %v31 = insertelement <32 x float> %v30, float %e31, i64 31
650 ret <32 x float> %v31
653 define <8 x double> @buildvec_v8f64(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) {
654 ; RV32-LABEL: buildvec_v8f64:
656 ; RV32-NEXT: addi sp, sp, -128
657 ; RV32-NEXT: .cfi_def_cfa_offset 128
658 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
659 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
660 ; RV32-NEXT: .cfi_offset ra, -4
661 ; RV32-NEXT: .cfi_offset s0, -8
662 ; RV32-NEXT: addi s0, sp, 128
663 ; RV32-NEXT: .cfi_def_cfa s0, 0
664 ; RV32-NEXT: andi sp, sp, -64
665 ; RV32-NEXT: fsd fa7, 56(sp)
666 ; RV32-NEXT: fsd fa6, 48(sp)
667 ; RV32-NEXT: fsd fa5, 40(sp)
668 ; RV32-NEXT: fsd fa4, 32(sp)
669 ; RV32-NEXT: fsd fa3, 24(sp)
670 ; RV32-NEXT: fsd fa2, 16(sp)
671 ; RV32-NEXT: fsd fa1, 8(sp)
672 ; RV32-NEXT: fsd fa0, 0(sp)
673 ; RV32-NEXT: mv a0, sp
674 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
675 ; RV32-NEXT: vle64.v v8, (a0)
676 ; RV32-NEXT: addi sp, s0, -128
677 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
678 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
679 ; RV32-NEXT: addi sp, sp, 128
682 ; RV64-LABEL: buildvec_v8f64:
684 ; RV64-NEXT: addi sp, sp, -128
685 ; RV64-NEXT: .cfi_def_cfa_offset 128
686 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
687 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
688 ; RV64-NEXT: .cfi_offset ra, -8
689 ; RV64-NEXT: .cfi_offset s0, -16
690 ; RV64-NEXT: addi s0, sp, 128
691 ; RV64-NEXT: .cfi_def_cfa s0, 0
692 ; RV64-NEXT: andi sp, sp, -64
693 ; RV64-NEXT: fsd fa7, 56(sp)
694 ; RV64-NEXT: fsd fa6, 48(sp)
695 ; RV64-NEXT: fsd fa5, 40(sp)
696 ; RV64-NEXT: fsd fa4, 32(sp)
697 ; RV64-NEXT: fsd fa3, 24(sp)
698 ; RV64-NEXT: fsd fa2, 16(sp)
699 ; RV64-NEXT: fsd fa1, 8(sp)
700 ; RV64-NEXT: fsd fa0, 0(sp)
701 ; RV64-NEXT: mv a0, sp
702 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
703 ; RV64-NEXT: vle64.v v8, (a0)
704 ; RV64-NEXT: addi sp, s0, -128
705 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
706 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
707 ; RV64-NEXT: addi sp, sp, 128
709 %v0 = insertelement <8 x double> poison, double %e0, i64 0
710 %v1 = insertelement <8 x double> %v0, double %e1, i64 1
711 %v2 = insertelement <8 x double> %v1, double %e2, i64 2
712 %v3 = insertelement <8 x double> %v2, double %e3, i64 3
713 %v4 = insertelement <8 x double> %v3, double %e4, i64 4
714 %v5 = insertelement <8 x double> %v4, double %e5, i64 5
715 %v6 = insertelement <8 x double> %v5, double %e6, i64 6
716 %v7 = insertelement <8 x double> %v6, double %e7, i64 7
720 define <16 x double> @buildvec_v16f64(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15) {
721 ; RV32-LABEL: buildvec_v16f64:
723 ; RV32-NEXT: addi sp, sp, -384
724 ; RV32-NEXT: .cfi_def_cfa_offset 384
725 ; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
726 ; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
727 ; RV32-NEXT: .cfi_offset ra, -4
728 ; RV32-NEXT: .cfi_offset s0, -8
729 ; RV32-NEXT: addi s0, sp, 384
730 ; RV32-NEXT: .cfi_def_cfa s0, 0
731 ; RV32-NEXT: andi sp, sp, -128
732 ; RV32-NEXT: sw a0, 120(sp)
733 ; RV32-NEXT: sw a1, 124(sp)
734 ; RV32-NEXT: fld ft0, 120(sp)
735 ; RV32-NEXT: sw a2, 120(sp)
736 ; RV32-NEXT: sw a3, 124(sp)
737 ; RV32-NEXT: fld ft1, 120(sp)
738 ; RV32-NEXT: sw a4, 120(sp)
739 ; RV32-NEXT: sw a5, 124(sp)
740 ; RV32-NEXT: fld ft2, 120(sp)
741 ; RV32-NEXT: sw a6, 120(sp)
742 ; RV32-NEXT: sw a7, 124(sp)
743 ; RV32-NEXT: fld ft3, 120(sp)
744 ; RV32-NEXT: fld ft4, 24(s0)
745 ; RV32-NEXT: fld ft5, 16(s0)
746 ; RV32-NEXT: fld ft6, 8(s0)
747 ; RV32-NEXT: fld ft7, 0(s0)
748 ; RV32-NEXT: fsd ft4, 248(sp)
749 ; RV32-NEXT: fsd ft5, 240(sp)
750 ; RV32-NEXT: fsd ft6, 232(sp)
751 ; RV32-NEXT: fsd ft7, 224(sp)
752 ; RV32-NEXT: fsd fa7, 184(sp)
753 ; RV32-NEXT: fsd fa6, 176(sp)
754 ; RV32-NEXT: fsd fa5, 168(sp)
755 ; RV32-NEXT: fsd fa4, 160(sp)
756 ; RV32-NEXT: fsd fa3, 152(sp)
757 ; RV32-NEXT: fsd fa2, 144(sp)
758 ; RV32-NEXT: fsd fa1, 136(sp)
759 ; RV32-NEXT: fsd fa0, 128(sp)
760 ; RV32-NEXT: fsd ft3, 216(sp)
761 ; RV32-NEXT: fsd ft2, 208(sp)
762 ; RV32-NEXT: fsd ft1, 200(sp)
763 ; RV32-NEXT: fsd ft0, 192(sp)
764 ; RV32-NEXT: addi a0, sp, 128
765 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
766 ; RV32-NEXT: vle64.v v8, (a0)
767 ; RV32-NEXT: addi sp, s0, -384
768 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
769 ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
770 ; RV32-NEXT: addi sp, sp, 384
773 ; RV64-LABEL: buildvec_v16f64:
775 ; RV64-NEXT: addi sp, sp, -256
776 ; RV64-NEXT: .cfi_def_cfa_offset 256
777 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
778 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
779 ; RV64-NEXT: .cfi_offset ra, -8
780 ; RV64-NEXT: .cfi_offset s0, -16
781 ; RV64-NEXT: addi s0, sp, 256
782 ; RV64-NEXT: .cfi_def_cfa s0, 0
783 ; RV64-NEXT: andi sp, sp, -128
784 ; RV64-NEXT: sd a7, 120(sp)
785 ; RV64-NEXT: sd a6, 112(sp)
786 ; RV64-NEXT: sd a5, 104(sp)
787 ; RV64-NEXT: sd a4, 96(sp)
788 ; RV64-NEXT: sd a3, 88(sp)
789 ; RV64-NEXT: sd a2, 80(sp)
790 ; RV64-NEXT: sd a1, 72(sp)
791 ; RV64-NEXT: sd a0, 64(sp)
792 ; RV64-NEXT: fsd fa7, 56(sp)
793 ; RV64-NEXT: fsd fa6, 48(sp)
794 ; RV64-NEXT: fsd fa5, 40(sp)
795 ; RV64-NEXT: fsd fa4, 32(sp)
796 ; RV64-NEXT: fsd fa3, 24(sp)
797 ; RV64-NEXT: fsd fa2, 16(sp)
798 ; RV64-NEXT: fsd fa1, 8(sp)
799 ; RV64-NEXT: fsd fa0, 0(sp)
800 ; RV64-NEXT: mv a0, sp
801 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
802 ; RV64-NEXT: vle64.v v8, (a0)
803 ; RV64-NEXT: addi sp, s0, -256
804 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
805 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
806 ; RV64-NEXT: addi sp, sp, 256
808 %v0 = insertelement <16 x double> poison, double %e0, i64 0
809 %v1 = insertelement <16 x double> %v0, double %e1, i64 1
810 %v2 = insertelement <16 x double> %v1, double %e2, i64 2
811 %v3 = insertelement <16 x double> %v2, double %e3, i64 3
812 %v4 = insertelement <16 x double> %v3, double %e4, i64 4
813 %v5 = insertelement <16 x double> %v4, double %e5, i64 5
814 %v6 = insertelement <16 x double> %v5, double %e6, i64 6
815 %v7 = insertelement <16 x double> %v6, double %e7, i64 7
816 %v8 = insertelement <16 x double> %v7, double %e8, i64 8
817 %v9 = insertelement <16 x double> %v8, double %e9, i64 9
818 %v10 = insertelement <16 x double> %v9, double %e10, i64 10
819 %v11 = insertelement <16 x double> %v10, double %e11, i64 11
820 %v12 = insertelement <16 x double> %v11, double %e12, i64 12
821 %v13 = insertelement <16 x double> %v12, double %e13, i64 13
822 %v14 = insertelement <16 x double> %v13, double %e14, i64 14
823 %v15 = insertelement <16 x double> %v14, double %e15, i64 15
824 ret <16 x double> %v15
827 define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15, double %e16, double %e17, double %e18, double %e19, double %e20, double %e21, double %e22, double %e23, double %e24, double %e25, double %e26, double %e27, double %e28, double %e29, double %e30, double %e31) {
828 ; RV32-LABEL: buildvec_v32f64:
830 ; RV32-NEXT: addi sp, sp, -512
831 ; RV32-NEXT: .cfi_def_cfa_offset 512
832 ; RV32-NEXT: sw ra, 508(sp) # 4-byte Folded Spill
833 ; RV32-NEXT: sw s0, 504(sp) # 4-byte Folded Spill
834 ; RV32-NEXT: fsd fs0, 496(sp) # 8-byte Folded Spill
835 ; RV32-NEXT: fsd fs1, 488(sp) # 8-byte Folded Spill
836 ; RV32-NEXT: fsd fs2, 480(sp) # 8-byte Folded Spill
837 ; RV32-NEXT: fsd fs3, 472(sp) # 8-byte Folded Spill
838 ; RV32-NEXT: fsd fs4, 464(sp) # 8-byte Folded Spill
839 ; RV32-NEXT: fsd fs5, 456(sp) # 8-byte Folded Spill
840 ; RV32-NEXT: fsd fs6, 448(sp) # 8-byte Folded Spill
841 ; RV32-NEXT: fsd fs7, 440(sp) # 8-byte Folded Spill
842 ; RV32-NEXT: fsd fs8, 432(sp) # 8-byte Folded Spill
843 ; RV32-NEXT: fsd fs9, 424(sp) # 8-byte Folded Spill
844 ; RV32-NEXT: fsd fs10, 416(sp) # 8-byte Folded Spill
845 ; RV32-NEXT: fsd fs11, 408(sp) # 8-byte Folded Spill
846 ; RV32-NEXT: .cfi_offset ra, -4
847 ; RV32-NEXT: .cfi_offset s0, -8
848 ; RV32-NEXT: .cfi_offset fs0, -16
849 ; RV32-NEXT: .cfi_offset fs1, -24
850 ; RV32-NEXT: .cfi_offset fs2, -32
851 ; RV32-NEXT: .cfi_offset fs3, -40
852 ; RV32-NEXT: .cfi_offset fs4, -48
853 ; RV32-NEXT: .cfi_offset fs5, -56
854 ; RV32-NEXT: .cfi_offset fs6, -64
855 ; RV32-NEXT: .cfi_offset fs7, -72
856 ; RV32-NEXT: .cfi_offset fs8, -80
857 ; RV32-NEXT: .cfi_offset fs9, -88
858 ; RV32-NEXT: .cfi_offset fs10, -96
859 ; RV32-NEXT: .cfi_offset fs11, -104
860 ; RV32-NEXT: addi s0, sp, 512
861 ; RV32-NEXT: .cfi_def_cfa s0, 0
862 ; RV32-NEXT: andi sp, sp, -128
863 ; RV32-NEXT: sw a0, 120(sp)
864 ; RV32-NEXT: sw a1, 124(sp)
865 ; RV32-NEXT: fld ft0, 120(sp)
866 ; RV32-NEXT: sw a2, 120(sp)
867 ; RV32-NEXT: sw a3, 124(sp)
868 ; RV32-NEXT: fld ft1, 120(sp)
869 ; RV32-NEXT: sw a4, 120(sp)
870 ; RV32-NEXT: sw a5, 124(sp)
871 ; RV32-NEXT: fld ft2, 120(sp)
872 ; RV32-NEXT: sw a6, 120(sp)
873 ; RV32-NEXT: sw a7, 124(sp)
874 ; RV32-NEXT: fld ft3, 120(sp)
875 ; RV32-NEXT: fld ft4, 0(s0)
876 ; RV32-NEXT: fld ft5, 8(s0)
877 ; RV32-NEXT: fld ft6, 16(s0)
878 ; RV32-NEXT: fld ft7, 24(s0)
879 ; RV32-NEXT: fld ft8, 32(s0)
880 ; RV32-NEXT: fld ft9, 40(s0)
881 ; RV32-NEXT: fld ft10, 48(s0)
882 ; RV32-NEXT: fld ft11, 56(s0)
883 ; RV32-NEXT: fld fs0, 64(s0)
884 ; RV32-NEXT: fld fs1, 72(s0)
885 ; RV32-NEXT: fld fs2, 80(s0)
886 ; RV32-NEXT: fld fs3, 88(s0)
887 ; RV32-NEXT: fld fs4, 96(s0)
888 ; RV32-NEXT: fld fs5, 104(s0)
889 ; RV32-NEXT: fld fs6, 112(s0)
890 ; RV32-NEXT: fld fs7, 120(s0)
891 ; RV32-NEXT: fld fs8, 152(s0)
892 ; RV32-NEXT: fld fs9, 144(s0)
893 ; RV32-NEXT: fld fs10, 136(s0)
894 ; RV32-NEXT: fld fs11, 128(s0)
895 ; RV32-NEXT: fsd fs8, 248(sp)
896 ; RV32-NEXT: fsd fs9, 240(sp)
897 ; RV32-NEXT: fsd fs10, 232(sp)
898 ; RV32-NEXT: fsd fs11, 224(sp)
899 ; RV32-NEXT: fsd fs7, 216(sp)
900 ; RV32-NEXT: fsd fs6, 208(sp)
901 ; RV32-NEXT: fsd fs5, 200(sp)
902 ; RV32-NEXT: fsd fs4, 192(sp)
903 ; RV32-NEXT: fsd fs3, 184(sp)
904 ; RV32-NEXT: fsd fs2, 176(sp)
905 ; RV32-NEXT: fsd fs1, 168(sp)
906 ; RV32-NEXT: fsd fs0, 160(sp)
907 ; RV32-NEXT: fsd ft11, 152(sp)
908 ; RV32-NEXT: fsd ft10, 144(sp)
909 ; RV32-NEXT: fsd ft9, 136(sp)
910 ; RV32-NEXT: fsd ft8, 128(sp)
911 ; RV32-NEXT: fsd ft7, 376(sp)
912 ; RV32-NEXT: fsd ft6, 368(sp)
913 ; RV32-NEXT: fsd ft5, 360(sp)
914 ; RV32-NEXT: fsd ft4, 352(sp)
915 ; RV32-NEXT: fsd fa7, 312(sp)
916 ; RV32-NEXT: fsd fa6, 304(sp)
917 ; RV32-NEXT: fsd fa5, 296(sp)
918 ; RV32-NEXT: fsd fa4, 288(sp)
919 ; RV32-NEXT: fsd fa3, 280(sp)
920 ; RV32-NEXT: fsd fa2, 272(sp)
921 ; RV32-NEXT: fsd fa1, 264(sp)
922 ; RV32-NEXT: fsd fa0, 256(sp)
923 ; RV32-NEXT: fsd ft3, 344(sp)
924 ; RV32-NEXT: fsd ft2, 336(sp)
925 ; RV32-NEXT: fsd ft1, 328(sp)
926 ; RV32-NEXT: fsd ft0, 320(sp)
927 ; RV32-NEXT: addi a0, sp, 128
928 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
929 ; RV32-NEXT: vle64.v v16, (a0)
930 ; RV32-NEXT: addi a0, sp, 256
931 ; RV32-NEXT: vle64.v v8, (a0)
932 ; RV32-NEXT: addi sp, s0, -512
933 ; RV32-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
934 ; RV32-NEXT: lw s0, 504(sp) # 4-byte Folded Reload
935 ; RV32-NEXT: fld fs0, 496(sp) # 8-byte Folded Reload
936 ; RV32-NEXT: fld fs1, 488(sp) # 8-byte Folded Reload
937 ; RV32-NEXT: fld fs2, 480(sp) # 8-byte Folded Reload
938 ; RV32-NEXT: fld fs3, 472(sp) # 8-byte Folded Reload
939 ; RV32-NEXT: fld fs4, 464(sp) # 8-byte Folded Reload
940 ; RV32-NEXT: fld fs5, 456(sp) # 8-byte Folded Reload
941 ; RV32-NEXT: fld fs6, 448(sp) # 8-byte Folded Reload
942 ; RV32-NEXT: fld fs7, 440(sp) # 8-byte Folded Reload
943 ; RV32-NEXT: fld fs8, 432(sp) # 8-byte Folded Reload
944 ; RV32-NEXT: fld fs9, 424(sp) # 8-byte Folded Reload
945 ; RV32-NEXT: fld fs10, 416(sp) # 8-byte Folded Reload
946 ; RV32-NEXT: fld fs11, 408(sp) # 8-byte Folded Reload
947 ; RV32-NEXT: addi sp, sp, 512
950 ; RV64-LABEL: buildvec_v32f64:
952 ; RV64-NEXT: addi sp, sp, -384
953 ; RV64-NEXT: .cfi_def_cfa_offset 384
954 ; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
955 ; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
956 ; RV64-NEXT: fsd fs0, 360(sp) # 8-byte Folded Spill
957 ; RV64-NEXT: fsd fs1, 352(sp) # 8-byte Folded Spill
958 ; RV64-NEXT: fsd fs2, 344(sp) # 8-byte Folded Spill
959 ; RV64-NEXT: fsd fs3, 336(sp) # 8-byte Folded Spill
960 ; RV64-NEXT: .cfi_offset ra, -8
961 ; RV64-NEXT: .cfi_offset s0, -16
962 ; RV64-NEXT: .cfi_offset fs0, -24
963 ; RV64-NEXT: .cfi_offset fs1, -32
964 ; RV64-NEXT: .cfi_offset fs2, -40
965 ; RV64-NEXT: .cfi_offset fs3, -48
966 ; RV64-NEXT: addi s0, sp, 384
967 ; RV64-NEXT: .cfi_def_cfa s0, 0
968 ; RV64-NEXT: andi sp, sp, -128
969 ; RV64-NEXT: fld ft0, 0(s0)
970 ; RV64-NEXT: fld ft1, 8(s0)
971 ; RV64-NEXT: fld ft2, 16(s0)
972 ; RV64-NEXT: fld ft3, 24(s0)
973 ; RV64-NEXT: fld ft4, 32(s0)
974 ; RV64-NEXT: fld ft5, 40(s0)
975 ; RV64-NEXT: fld ft6, 48(s0)
976 ; RV64-NEXT: fld ft7, 56(s0)
977 ; RV64-NEXT: fld ft8, 64(s0)
978 ; RV64-NEXT: fld ft9, 72(s0)
979 ; RV64-NEXT: fld ft10, 80(s0)
980 ; RV64-NEXT: fld ft11, 88(s0)
981 ; RV64-NEXT: fld fs0, 96(s0)
982 ; RV64-NEXT: fld fs1, 104(s0)
983 ; RV64-NEXT: fld fs2, 112(s0)
984 ; RV64-NEXT: fld fs3, 120(s0)
985 ; RV64-NEXT: sd a7, 248(sp)
986 ; RV64-NEXT: sd a6, 240(sp)
987 ; RV64-NEXT: sd a5, 232(sp)
988 ; RV64-NEXT: sd a4, 224(sp)
989 ; RV64-NEXT: sd a3, 216(sp)
990 ; RV64-NEXT: sd a2, 208(sp)
991 ; RV64-NEXT: sd a1, 200(sp)
992 ; RV64-NEXT: sd a0, 192(sp)
993 ; RV64-NEXT: fsd fa7, 184(sp)
994 ; RV64-NEXT: fsd fa6, 176(sp)
995 ; RV64-NEXT: fsd fa5, 168(sp)
996 ; RV64-NEXT: fsd fa4, 160(sp)
997 ; RV64-NEXT: fsd fa3, 152(sp)
998 ; RV64-NEXT: fsd fa2, 144(sp)
999 ; RV64-NEXT: fsd fa1, 136(sp)
1000 ; RV64-NEXT: fsd fa0, 128(sp)
1001 ; RV64-NEXT: fsd fs3, 120(sp)
1002 ; RV64-NEXT: fsd fs2, 112(sp)
1003 ; RV64-NEXT: fsd fs1, 104(sp)
1004 ; RV64-NEXT: fsd fs0, 96(sp)
1005 ; RV64-NEXT: fsd ft11, 88(sp)
1006 ; RV64-NEXT: fsd ft10, 80(sp)
1007 ; RV64-NEXT: fsd ft9, 72(sp)
1008 ; RV64-NEXT: fsd ft8, 64(sp)
1009 ; RV64-NEXT: fsd ft7, 56(sp)
1010 ; RV64-NEXT: fsd ft6, 48(sp)
1011 ; RV64-NEXT: fsd ft5, 40(sp)
1012 ; RV64-NEXT: fsd ft4, 32(sp)
1013 ; RV64-NEXT: fsd ft3, 24(sp)
1014 ; RV64-NEXT: fsd ft2, 16(sp)
1015 ; RV64-NEXT: fsd ft1, 8(sp)
1016 ; RV64-NEXT: fsd ft0, 0(sp)
1017 ; RV64-NEXT: addi a0, sp, 128
1018 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1019 ; RV64-NEXT: vle64.v v8, (a0)
1020 ; RV64-NEXT: mv a0, sp
1021 ; RV64-NEXT: vle64.v v16, (a0)
1022 ; RV64-NEXT: addi sp, s0, -384
1023 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
1024 ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
1025 ; RV64-NEXT: fld fs0, 360(sp) # 8-byte Folded Reload
1026 ; RV64-NEXT: fld fs1, 352(sp) # 8-byte Folded Reload
1027 ; RV64-NEXT: fld fs2, 344(sp) # 8-byte Folded Reload
1028 ; RV64-NEXT: fld fs3, 336(sp) # 8-byte Folded Reload
1029 ; RV64-NEXT: addi sp, sp, 384
1031 %v0 = insertelement <32 x double> poison, double %e0, i64 0
1032 %v1 = insertelement <32 x double> %v0, double %e1, i64 1
1033 %v2 = insertelement <32 x double> %v1, double %e2, i64 2
1034 %v3 = insertelement <32 x double> %v2, double %e3, i64 3
1035 %v4 = insertelement <32 x double> %v3, double %e4, i64 4
1036 %v5 = insertelement <32 x double> %v4, double %e5, i64 5
1037 %v6 = insertelement <32 x double> %v5, double %e6, i64 6
1038 %v7 = insertelement <32 x double> %v6, double %e7, i64 7
1039 %v8 = insertelement <32 x double> %v7, double %e8, i64 8
1040 %v9 = insertelement <32 x double> %v8, double %e9, i64 9
1041 %v10 = insertelement <32 x double> %v9, double %e10, i64 10
1042 %v11 = insertelement <32 x double> %v10, double %e11, i64 11
1043 %v12 = insertelement <32 x double> %v11, double %e12, i64 12
1044 %v13 = insertelement <32 x double> %v12, double %e13, i64 13
1045 %v14 = insertelement <32 x double> %v13, double %e14, i64 14
1046 %v15 = insertelement <32 x double> %v14, double %e15, i64 15
1047 %v16 = insertelement <32 x double> %v15, double %e16, i64 16
1048 %v17 = insertelement <32 x double> %v16, double %e17, i64 17
1049 %v18 = insertelement <32 x double> %v17, double %e18, i64 18
1050 %v19 = insertelement <32 x double> %v18, double %e19, i64 19
1051 %v20 = insertelement <32 x double> %v19, double %e20, i64 20
1052 %v21 = insertelement <32 x double> %v20, double %e21, i64 21
1053 %v22 = insertelement <32 x double> %v21, double %e22, i64 22
1054 %v23 = insertelement <32 x double> %v22, double %e23, i64 23
1055 %v24 = insertelement <32 x double> %v23, double %e24, i64 24
1056 %v25 = insertelement <32 x double> %v24, double %e25, i64 25
1057 %v26 = insertelement <32 x double> %v25, double %e26, i64 26
1058 %v27 = insertelement <32 x double> %v26, double %e27, i64 27
1059 %v28 = insertelement <32 x double> %v27, double %e28, i64 28
1060 %v29 = insertelement <32 x double> %v28, double %e29, i64 29
1061 %v30 = insertelement <32 x double> %v29, double %e30, i64 30
1062 %v31 = insertelement <32 x double> %v30, double %e31, i64 31
1063 ret <32 x double> %v31
1066 define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15, double %e16, double %e17, double %e18, double %e19, double %e20, double %e21, double %e22, double %e23, double %e24, double %e25, double %e26, double %e27, double %e28, double %e29, double %e30, double %e31) vscale_range(2,2) {
1067 ; RV32-LABEL: buildvec_v32f64_exact_vlen:
1069 ; RV32-NEXT: addi sp, sp, -32
1070 ; RV32-NEXT: .cfi_def_cfa_offset 32
1071 ; RV32-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
1072 ; RV32-NEXT: fsd fs1, 16(sp) # 8-byte Folded Spill
1073 ; RV32-NEXT: .cfi_offset fs0, -8
1074 ; RV32-NEXT: .cfi_offset fs1, -16
1075 ; RV32-NEXT: sw a6, 8(sp)
1076 ; RV32-NEXT: sw a7, 12(sp)
1077 ; RV32-NEXT: fld ft4, 8(sp)
1078 ; RV32-NEXT: sw a4, 8(sp)
1079 ; RV32-NEXT: sw a5, 12(sp)
1080 ; RV32-NEXT: fld ft5, 8(sp)
1081 ; RV32-NEXT: sw a2, 8(sp)
1082 ; RV32-NEXT: sw a3, 12(sp)
1083 ; RV32-NEXT: fld ft6, 8(sp)
1084 ; RV32-NEXT: sw a0, 8(sp)
1085 ; RV32-NEXT: sw a1, 12(sp)
1086 ; RV32-NEXT: fld ft7, 8(sp)
1087 ; RV32-NEXT: fld ft0, 184(sp)
1088 ; RV32-NEXT: fld ft1, 168(sp)
1089 ; RV32-NEXT: fld ft2, 152(sp)
1090 ; RV32-NEXT: fld ft3, 136(sp)
1091 ; RV32-NEXT: fld ft8, 120(sp)
1092 ; RV32-NEXT: fld ft9, 104(sp)
1093 ; RV32-NEXT: fld ft10, 72(sp)
1094 ; RV32-NEXT: fld ft11, 88(sp)
1095 ; RV32-NEXT: fld fs0, 56(sp)
1096 ; RV32-NEXT: fld fs1, 40(sp)
1097 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1098 ; RV32-NEXT: vfmv.v.f v8, ft7
1099 ; RV32-NEXT: vfslide1down.vf v12, v8, ft6
1100 ; RV32-NEXT: vfmv.v.f v8, fa2
1101 ; RV32-NEXT: vfslide1down.vf v9, v8, fa3
1102 ; RV32-NEXT: vfmv.v.f v8, fa0
1103 ; RV32-NEXT: vfslide1down.vf v8, v8, fa1
1104 ; RV32-NEXT: vfmv.v.f v10, fa4
1105 ; RV32-NEXT: vfslide1down.vf v10, v10, fa5
1106 ; RV32-NEXT: vfmv.v.f v11, fa6
1107 ; RV32-NEXT: vfslide1down.vf v11, v11, fa7
1108 ; RV32-NEXT: addi a0, sp, 32
1109 ; RV32-NEXT: vlse64.v v14, (a0), zero
1110 ; RV32-NEXT: addi a0, sp, 48
1111 ; RV32-NEXT: vlse64.v v15, (a0), zero
1112 ; RV32-NEXT: vfmv.v.f v13, ft5
1113 ; RV32-NEXT: vfslide1down.vf v13, v13, ft4
1114 ; RV32-NEXT: vfslide1down.vf v14, v14, fs1
1115 ; RV32-NEXT: vfslide1down.vf v15, v15, fs0
1116 ; RV32-NEXT: addi a0, sp, 80
1117 ; RV32-NEXT: vlse64.v v16, (a0), zero
1118 ; RV32-NEXT: addi a0, sp, 64
1119 ; RV32-NEXT: vlse64.v v18, (a0), zero
1120 ; RV32-NEXT: addi a0, sp, 96
1121 ; RV32-NEXT: vlse64.v v19, (a0), zero
1122 ; RV32-NEXT: addi a0, sp, 112
1123 ; RV32-NEXT: vlse64.v v20, (a0), zero
1124 ; RV32-NEXT: vfslide1down.vf v17, v16, ft11
1125 ; RV32-NEXT: vfslide1down.vf v16, v18, ft10
1126 ; RV32-NEXT: vfslide1down.vf v18, v19, ft9
1127 ; RV32-NEXT: vfslide1down.vf v19, v20, ft8
1128 ; RV32-NEXT: addi a0, sp, 128
1129 ; RV32-NEXT: vlse64.v v20, (a0), zero
1130 ; RV32-NEXT: addi a0, sp, 144
1131 ; RV32-NEXT: vlse64.v v21, (a0), zero
1132 ; RV32-NEXT: addi a0, sp, 160
1133 ; RV32-NEXT: vlse64.v v22, (a0), zero
1134 ; RV32-NEXT: addi a0, sp, 176
1135 ; RV32-NEXT: vlse64.v v23, (a0), zero
1136 ; RV32-NEXT: vfslide1down.vf v20, v20, ft3
1137 ; RV32-NEXT: vfslide1down.vf v21, v21, ft2
1138 ; RV32-NEXT: vfslide1down.vf v22, v22, ft1
1139 ; RV32-NEXT: vfslide1down.vf v23, v23, ft0
1140 ; RV32-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
1141 ; RV32-NEXT: fld fs1, 16(sp) # 8-byte Folded Reload
1142 ; RV32-NEXT: addi sp, sp, 32
1145 ; RV64-LABEL: buildvec_v32f64_exact_vlen:
1147 ; RV64-NEXT: addi sp, sp, -32
1148 ; RV64-NEXT: .cfi_def_cfa_offset 32
1149 ; RV64-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
1150 ; RV64-NEXT: fsd fs1, 16(sp) # 8-byte Folded Spill
1151 ; RV64-NEXT: fsd fs2, 8(sp) # 8-byte Folded Spill
1152 ; RV64-NEXT: fsd fs3, 0(sp) # 8-byte Folded Spill
1153 ; RV64-NEXT: .cfi_offset fs0, -8
1154 ; RV64-NEXT: .cfi_offset fs1, -16
1155 ; RV64-NEXT: .cfi_offset fs2, -24
1156 ; RV64-NEXT: .cfi_offset fs3, -32
1157 ; RV64-NEXT: fmv.d.x ft4, a7
1158 ; RV64-NEXT: fmv.d.x ft5, a6
1159 ; RV64-NEXT: fmv.d.x ft6, a5
1160 ; RV64-NEXT: fmv.d.x ft7, a4
1161 ; RV64-NEXT: fmv.d.x ft8, a3
1162 ; RV64-NEXT: fmv.d.x ft9, a2
1163 ; RV64-NEXT: fmv.d.x ft10, a1
1164 ; RV64-NEXT: fmv.d.x ft11, a0
1165 ; RV64-NEXT: fld ft0, 152(sp)
1166 ; RV64-NEXT: fld ft1, 136(sp)
1167 ; RV64-NEXT: fld ft2, 120(sp)
1168 ; RV64-NEXT: fld ft3, 104(sp)
1169 ; RV64-NEXT: fld fs0, 88(sp)
1170 ; RV64-NEXT: fld fs1, 72(sp)
1171 ; RV64-NEXT: fld fs2, 40(sp)
1172 ; RV64-NEXT: fld fs3, 56(sp)
1173 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1174 ; RV64-NEXT: vfmv.v.f v8, fa2
1175 ; RV64-NEXT: vfslide1down.vf v9, v8, fa3
1176 ; RV64-NEXT: vfmv.v.f v8, fa0
1177 ; RV64-NEXT: vfslide1down.vf v8, v8, fa1
1178 ; RV64-NEXT: vfmv.v.f v10, fa4
1179 ; RV64-NEXT: vfslide1down.vf v10, v10, fa5
1180 ; RV64-NEXT: vfmv.v.f v11, fa6
1181 ; RV64-NEXT: vfslide1down.vf v11, v11, fa7
1182 ; RV64-NEXT: vfmv.v.f v12, ft11
1183 ; RV64-NEXT: vfslide1down.vf v12, v12, ft10
1184 ; RV64-NEXT: vfmv.v.f v13, ft9
1185 ; RV64-NEXT: vfslide1down.vf v13, v13, ft8
1186 ; RV64-NEXT: vfmv.v.f v14, ft7
1187 ; RV64-NEXT: vfslide1down.vf v14, v14, ft6
1188 ; RV64-NEXT: vfmv.v.f v15, ft5
1189 ; RV64-NEXT: vfslide1down.vf v15, v15, ft4
1190 ; RV64-NEXT: addi a0, sp, 48
1191 ; RV64-NEXT: vlse64.v v16, (a0), zero
1192 ; RV64-NEXT: addi a0, sp, 32
1193 ; RV64-NEXT: vlse64.v v18, (a0), zero
1194 ; RV64-NEXT: addi a0, sp, 64
1195 ; RV64-NEXT: vlse64.v v19, (a0), zero
1196 ; RV64-NEXT: addi a0, sp, 80
1197 ; RV64-NEXT: vlse64.v v20, (a0), zero
1198 ; RV64-NEXT: vfslide1down.vf v17, v16, fs3
1199 ; RV64-NEXT: vfslide1down.vf v16, v18, fs2
1200 ; RV64-NEXT: vfslide1down.vf v18, v19, fs1
1201 ; RV64-NEXT: vfslide1down.vf v19, v20, fs0
1202 ; RV64-NEXT: addi a0, sp, 96
1203 ; RV64-NEXT: vlse64.v v20, (a0), zero
1204 ; RV64-NEXT: addi a0, sp, 112
1205 ; RV64-NEXT: vlse64.v v21, (a0), zero
1206 ; RV64-NEXT: addi a0, sp, 128
1207 ; RV64-NEXT: vlse64.v v22, (a0), zero
1208 ; RV64-NEXT: addi a0, sp, 144
1209 ; RV64-NEXT: vlse64.v v23, (a0), zero
1210 ; RV64-NEXT: vfslide1down.vf v20, v20, ft3
1211 ; RV64-NEXT: vfslide1down.vf v21, v21, ft2
1212 ; RV64-NEXT: vfslide1down.vf v22, v22, ft1
1213 ; RV64-NEXT: vfslide1down.vf v23, v23, ft0
1214 ; RV64-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
1215 ; RV64-NEXT: fld fs1, 16(sp) # 8-byte Folded Reload
1216 ; RV64-NEXT: fld fs2, 8(sp) # 8-byte Folded Reload
1217 ; RV64-NEXT: fld fs3, 0(sp) # 8-byte Folded Reload
1218 ; RV64-NEXT: addi sp, sp, 32
1220 %v0 = insertelement <32 x double> poison, double %e0, i64 0
1221 %v1 = insertelement <32 x double> %v0, double %e1, i64 1
1222 %v2 = insertelement <32 x double> %v1, double %e2, i64 2
1223 %v3 = insertelement <32 x double> %v2, double %e3, i64 3
1224 %v4 = insertelement <32 x double> %v3, double %e4, i64 4
1225 %v5 = insertelement <32 x double> %v4, double %e5, i64 5
1226 %v6 = insertelement <32 x double> %v5, double %e6, i64 6
1227 %v7 = insertelement <32 x double> %v6, double %e7, i64 7
1228 %v8 = insertelement <32 x double> %v7, double %e8, i64 8
1229 %v9 = insertelement <32 x double> %v8, double %e9, i64 9
1230 %v10 = insertelement <32 x double> %v9, double %e10, i64 10
1231 %v11 = insertelement <32 x double> %v10, double %e11, i64 11
1232 %v12 = insertelement <32 x double> %v11, double %e12, i64 12
1233 %v13 = insertelement <32 x double> %v12, double %e13, i64 13
1234 %v14 = insertelement <32 x double> %v13, double %e14, i64 14
1235 %v15 = insertelement <32 x double> %v14, double %e15, i64 15
1236 %v16 = insertelement <32 x double> %v15, double %e16, i64 16
1237 %v17 = insertelement <32 x double> %v16, double %e17, i64 17
1238 %v18 = insertelement <32 x double> %v17, double %e18, i64 18
1239 %v19 = insertelement <32 x double> %v18, double %e19, i64 19
1240 %v20 = insertelement <32 x double> %v19, double %e20, i64 20
1241 %v21 = insertelement <32 x double> %v20, double %e21, i64 21
1242 %v22 = insertelement <32 x double> %v21, double %e22, i64 22
1243 %v23 = insertelement <32 x double> %v22, double %e23, i64 23
1244 %v24 = insertelement <32 x double> %v23, double %e24, i64 24
1245 %v25 = insertelement <32 x double> %v24, double %e25, i64 25
1246 %v26 = insertelement <32 x double> %v25, double %e26, i64 26
1247 %v27 = insertelement <32 x double> %v26, double %e27, i64 27
1248 %v28 = insertelement <32 x double> %v27, double %e28, i64 28
1249 %v29 = insertelement <32 x double> %v28, double %e29, i64 29
1250 %v30 = insertelement <32 x double> %v29, double %e30, i64 30
1251 %v31 = insertelement <32 x double> %v30, double %e31, i64 31
1252 ret <32 x double> %v31
1255 ; FIXME: These constants have enough sign bits that we could use vmv.v.x/i and
1256 ; vsext, but we don't support this for FP yet.
1257 define <2 x float> @signbits() {
1258 ; CHECK-LABEL: signbits:
1259 ; CHECK: # %bb.0: # %entry
1260 ; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
1261 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
1262 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1263 ; CHECK-NEXT: vle32.v v8, (a0)
1266 ret <2 x float> <float 0x36A0000000000000, float 0.000000e+00>
1269 define <2 x half> @vid_v2f16() {
1270 ; CHECK-LABEL: vid_v2f16:
1272 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1273 ; CHECK-NEXT: vid.v v8
1274 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1276 ret <2 x half> <half 0.0, half 1.0>
1279 define <2 x half> @vid_addend1_v2f16() {
1280 ; CHECK-LABEL: vid_addend1_v2f16:
1282 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1283 ; CHECK-NEXT: vid.v v8
1284 ; CHECK-NEXT: vadd.vi v8, v8, 1
1285 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1287 ret <2 x half> <half 1.0, half 2.0>
1290 define <2 x half> @vid_denominator2_v2f16() {
1291 ; CHECK-LABEL: vid_denominator2_v2f16:
1293 ; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
1294 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
1295 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1296 ; CHECK-NEXT: vle16.v v8, (a0)
1298 ret <2 x half> <half 0.5, half 1.0>
1301 define <2 x half> @vid_step2_v2f16() {
1302 ; CHECK-LABEL: vid_step2_v2f16:
1304 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1305 ; CHECK-NEXT: vid.v v8
1306 ; CHECK-NEXT: vadd.vv v8, v8, v8
1307 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1309 ret <2 x half> <half 0.0, half 2.0>
1312 define <2 x float> @vid_v2f32() {
1313 ; CHECK-LABEL: vid_v2f32:
1315 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1316 ; CHECK-NEXT: vid.v v8
1317 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1319 ret <2 x float> <float 0.0, float 1.0>
1322 define <2 x float> @vid_addend1_v2f32() {
1323 ; CHECK-LABEL: vid_addend1_v2f32:
1325 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1326 ; CHECK-NEXT: vid.v v8
1327 ; CHECK-NEXT: vadd.vi v8, v8, 1
1328 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1330 ret <2 x float> <float 1.0, float 2.0>
1333 define <2 x float> @vid_denominator2_v2f32() {
1334 ; CHECK-LABEL: vid_denominator2_v2f32:
1336 ; CHECK-NEXT: lui a0, %hi(.LCPI32_0)
1337 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0)
1338 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1339 ; CHECK-NEXT: vle32.v v8, (a0)
1341 ret <2 x float> <float 0.5, float 1.0>
1344 define <2 x float> @vid_step2_v2f32() {
1345 ; CHECK-LABEL: vid_step2_v2f32:
1347 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1348 ; CHECK-NEXT: vid.v v8
1349 ; CHECK-NEXT: vadd.vv v8, v8, v8
1350 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1352 ret <2 x float> <float 0.0, float 2.0>
1355 define <2 x double> @vid_v2f64() {
1356 ; CHECK-LABEL: vid_v2f64:
1358 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1359 ; CHECK-NEXT: vid.v v8
1360 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1362 ret <2 x double> <double 0.0, double 1.0>
1365 define <2 x double> @vid_addend1_v2f64() {
1366 ; CHECK-LABEL: vid_addend1_v2f64:
1368 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1369 ; CHECK-NEXT: vid.v v8
1370 ; CHECK-NEXT: vadd.vi v8, v8, 1
1371 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1373 ret <2 x double> <double 1.0, double 2.0>
1376 define <2 x double> @vid_denominator2_v2f64() {
1377 ; CHECK-LABEL: vid_denominator2_v2f64:
1379 ; CHECK-NEXT: lui a0, %hi(.LCPI36_0)
1380 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0)
1381 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1382 ; CHECK-NEXT: vle64.v v8, (a0)
1384 ret <2 x double> <double 0.5, double 1.0>
1387 define <2 x double> @vid_step2_v2f64() {
1388 ; CHECK-LABEL: vid_step2_v2f64:
1390 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1391 ; CHECK-NEXT: vid.v v8
1392 ; CHECK-NEXT: vadd.vv v8, v8, v8
1393 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1395 ret <2 x double> <double 0.0, double 2.0>