1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; Tests that a floating-point build_vector doesn't try and generate a VID
8 define void @buildvec_no_vid_v4f32(<4 x float>* %x) {
9 ; CHECK-LABEL: buildvec_no_vid_v4f32:
11 ; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
12 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0)
13 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14 ; CHECK-NEXT: vle32.v v8, (a1)
15 ; CHECK-NEXT: vse32.v v8, (a0)
17 store <4 x float> <float 0.0, float 4.0, float 0.0, float 2.0>, <4 x float>* %x
21 ; Not all BUILD_VECTORs are successfully lowered by the backend: some are
22 ; expanded into scalarized stack stores. However, this may result in an
23 ; infinite loop in the DAGCombiner which tries to recombine those stores into a
24 ; BUILD_VECTOR followed by a vector store. The BUILD_VECTOR is then expanded
25 ; and the loop begins.
26 ; Until all BUILD_VECTORs are lowered, we disable store-combining after
27 ; legalization for fixed-length vectors.
28 ; This test uses a trick with a shufflevector which can't be lowered to a
29 ; SHUFFLE_VECTOR node; the mask is shorter than the source vectors and the
30 ; shuffle indices aren't located within the same 4-element subvector, so is
31 ; expanded to 4 EXTRACT_VECTOR_ELTs and a BUILD_VECTOR. This then triggers the
33 define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize {
34 ; CHECK-LABEL: hang_when_merging_stores_after_legalization:
36 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
37 ; CHECK-NEXT: vid.v v12
38 ; CHECK-NEXT: li a0, 7
39 ; CHECK-NEXT: vmul.vx v14, v12, a0
40 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
41 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
42 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
43 ; CHECK-NEXT: vadd.vi v8, v14, -14
44 ; CHECK-NEXT: vmv.v.i v0, 12
45 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
46 ; CHECK-NEXT: vrgatherei16.vv v12, v10, v8, v0.t
47 ; CHECK-NEXT: vmv1r.v v8, v12
49 %z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
53 define void @buildvec_dominant0_v2f32(<2 x float>* %x) {
54 ; CHECK-LABEL: buildvec_dominant0_v2f32:
56 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
57 ; CHECK-NEXT: vid.v v8
58 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
59 ; CHECK-NEXT: vse32.v v8, (a0)
61 store <2 x float> <float 0.0, float 1.0>, <2 x float>* %x
65 ; We don't want to lower this to the insertion of two scalar elements as above,
66 ; as each would require their own load from the constant pool.
68 define void @buildvec_dominant1_v2f32(<2 x float>* %x) {
69 ; CHECK-LABEL: buildvec_dominant1_v2f32:
71 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
72 ; CHECK-NEXT: vid.v v8
73 ; CHECK-NEXT: vadd.vi v8, v8, 1
74 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
75 ; CHECK-NEXT: vse32.v v8, (a0)
77 store <2 x float> <float 1.0, float 2.0>, <2 x float>* %x
81 define void @buildvec_dominant0_v4f32(<4 x float>* %x) {
82 ; CHECK-LABEL: buildvec_dominant0_v4f32:
84 ; CHECK-NEXT: lui a1, 262144
85 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
86 ; CHECK-NEXT: vmv.v.x v8, a1
87 ; CHECK-NEXT: vmv.s.x v9, zero
88 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
89 ; CHECK-NEXT: vslideup.vi v8, v9, 2
90 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
91 ; CHECK-NEXT: vse32.v v8, (a0)
93 store <4 x float> <float 2.0, float 2.0, float 0.0, float 2.0>, <4 x float>* %x
97 define void @buildvec_dominant1_v4f32(<4 x float>* %x, float %f) {
98 ; CHECK-LABEL: buildvec_dominant1_v4f32:
100 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
101 ; CHECK-NEXT: vfmv.v.f v8, fa0
102 ; CHECK-NEXT: vmv.s.x v9, zero
103 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
104 ; CHECK-NEXT: vslideup.vi v8, v9, 1
105 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
106 ; CHECK-NEXT: vse32.v v8, (a0)
108 %v0 = insertelement <4 x float> poison, float %f, i32 0
109 %v1 = insertelement <4 x float> %v0, float 0.0, i32 1
110 %v2 = insertelement <4 x float> %v1, float %f, i32 2
111 %v3 = insertelement <4 x float> %v2, float %f, i32 3
112 store <4 x float> %v3, <4 x float>* %x
116 define void @buildvec_dominant2_v4f32(<4 x float>* %x, float %f) {
117 ; CHECK-LABEL: buildvec_dominant2_v4f32:
119 ; CHECK-NEXT: lui a1, 262144
120 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
121 ; CHECK-NEXT: vmv.s.x v8, a1
122 ; CHECK-NEXT: vfmv.v.f v9, fa0
123 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
124 ; CHECK-NEXT: vslideup.vi v9, v8, 1
125 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
126 ; CHECK-NEXT: vse32.v v9, (a0)
128 %v0 = insertelement <4 x float> poison, float %f, i32 0
129 %v1 = insertelement <4 x float> %v0, float 2.0, i32 1
130 %v2 = insertelement <4 x float> %v1, float %f, i32 2
131 %v3 = insertelement <4 x float> %v2, float %f, i32 3
132 store <4 x float> %v3, <4 x float>* %x
136 define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) {
137 ; CHECK-LABEL: buildvec_merge0_v4f32:
139 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
140 ; CHECK-NEXT: vfmv.v.f v8, fa0
141 ; CHECK-NEXT: vmv.v.i v0, 6
142 ; CHECK-NEXT: lui a1, 262144
143 ; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0
144 ; CHECK-NEXT: vse32.v v8, (a0)
146 %v0 = insertelement <4 x float> poison, float %f, i32 0
147 %v1 = insertelement <4 x float> %v0, float 2.0, i32 1
148 %v2 = insertelement <4 x float> %v1, float 2.0, i32 2
149 %v3 = insertelement <4 x float> %v2, float %f, i32 3
150 store <4 x float> %v3, <4 x float>* %x
154 define <4 x half> @splat_c3_v4f16(<4 x half> %v) {
155 ; CHECK-LABEL: splat_c3_v4f16:
157 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
158 ; CHECK-NEXT: vrgather.vi v9, v8, 3
159 ; CHECK-NEXT: vmv1r.v v8, v9
161 %x = extractelement <4 x half> %v, i32 3
162 %ins = insertelement <4 x half> poison, half %x, i32 0
163 %splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer
164 ret <4 x half> %splat
167 define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
168 ; CHECK-LABEL: splat_idx_v4f16:
170 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
171 ; CHECK-NEXT: vrgather.vx v9, v8, a0
172 ; CHECK-NEXT: vmv1r.v v8, v9
174 %x = extractelement <4 x half> %v, i64 %idx
175 %ins = insertelement <4 x half> poison, half %x, i32 0
176 %splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer
177 ret <4 x half> %splat
180 define <8 x float> @splat_c5_v8f32(<8 x float> %v) {
181 ; CHECK-LABEL: splat_c5_v8f32:
183 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
184 ; CHECK-NEXT: vrgather.vi v10, v8, 5
185 ; CHECK-NEXT: vmv.v.v v8, v10
187 %x = extractelement <8 x float> %v, i32 5
188 %ins = insertelement <8 x float> poison, float %x, i32 0
189 %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer
190 ret <8 x float> %splat
193 define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) {
195 ; CHECK-LABEL: splat_idx_v8f32:
197 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
198 ; CHECK-NEXT: vrgather.vx v10, v8, a0
199 ; CHECK-NEXT: vmv.v.v v8, v10
201 %x = extractelement <8 x float> %v, i64 %idx
202 %ins = insertelement <8 x float> poison, float %x, i32 0
203 %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer
204 ret <8 x float> %splat
207 ; Test that we pull the vlse of the constant pool out of the loop.
208 define dso_local void @splat_load_licm(float* %0) {
209 ; RV32-LABEL: splat_load_licm:
211 ; RV32-NEXT: li a1, 1024
212 ; RV32-NEXT: lui a2, 263168
213 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
214 ; RV32-NEXT: vmv.v.x v8, a2
215 ; RV32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
216 ; RV32-NEXT: vse32.v v8, (a0)
217 ; RV32-NEXT: addi a1, a1, -4
218 ; RV32-NEXT: addi a0, a0, 16
219 ; RV32-NEXT: bnez a1, .LBB12_1
220 ; RV32-NEXT: # %bb.2:
223 ; RV64-LABEL: splat_load_licm:
225 ; RV64-NEXT: li a1, 1024
226 ; RV64-NEXT: lui a2, 263168
227 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
228 ; RV64-NEXT: vmv.v.x v8, a2
229 ; RV64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
230 ; RV64-NEXT: vse32.v v8, (a0)
231 ; RV64-NEXT: addiw a1, a1, -4
232 ; RV64-NEXT: addi a0, a0, 16
233 ; RV64-NEXT: bnez a1, .LBB12_1
234 ; RV64-NEXT: # %bb.2:
239 %3 = phi i32 [ 0, %1 ], [ %6, %2 ]
240 %4 = getelementptr inbounds float, float* %0, i32 %3
241 %5 = bitcast float* %4 to <4 x float>*
242 store <4 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, <4 x float>* %5, align 4
243 %6 = add nuw i32 %3, 4
244 %7 = icmp eq i32 %6, 1024
245 br i1 %7, label %8, label %2
251 define <2 x half> @buildvec_v2f16(half %a, half %b) {
252 ; CHECK-LABEL: buildvec_v2f16:
254 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
255 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
256 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
258 %v1 = insertelement <2 x half> poison, half %a, i64 0
259 %v2 = insertelement <2 x half> %v1, half %b, i64 1
263 define <2 x float> @buildvec_v2f32(float %a, float %b) {
264 ; CHECK-LABEL: buildvec_v2f32:
266 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
267 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
268 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
270 %v1 = insertelement <2 x float> poison, float %a, i64 0
271 %v2 = insertelement <2 x float> %v1, float %b, i64 1
275 define <2 x double> @buildvec_v2f64(double %a, double %b) {
276 ; CHECK-LABEL: buildvec_v2f64:
278 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
279 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
280 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
282 %v1 = insertelement <2 x double> poison, double %a, i64 0
283 %v2 = insertelement <2 x double> %v1, double %b, i64 1
287 define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
288 ; CHECK-LABEL: buildvec_v2f64_b:
290 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
291 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
292 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
294 %v1 = insertelement <2 x double> poison, double %b, i64 1
295 %v2 = insertelement <2 x double> %v1, double %a, i64 0
299 define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
300 ; CHECK-LABEL: buildvec_v4f32:
302 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
303 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
304 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
305 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
306 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
308 %v1 = insertelement <4 x float> poison, float %a, i64 0
309 %v2 = insertelement <4 x float> %v1, float %b, i64 1
310 %v3 = insertelement <4 x float> %v2, float %c, i64 2
311 %v4 = insertelement <4 x float> %v3, float %d, i64 3
315 define <8 x float> @buildvec_v8f32(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) {
316 ; CHECK-LABEL: buildvec_v8f32:
318 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
319 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
320 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
321 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
322 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
323 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
324 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
325 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
326 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
328 %v0 = insertelement <8 x float> poison, float %e0, i64 0
329 %v1 = insertelement <8 x float> %v0, float %e1, i64 1
330 %v2 = insertelement <8 x float> %v1, float %e2, i64 2
331 %v3 = insertelement <8 x float> %v2, float %e3, i64 3
332 %v4 = insertelement <8 x float> %v3, float %e4, i64 4
333 %v5 = insertelement <8 x float> %v4, float %e5, i64 5
334 %v6 = insertelement <8 x float> %v5, float %e6, i64 6
335 %v7 = insertelement <8 x float> %v6, float %e7, i64 7
339 define <16 x float> @buildvec_v16f32(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7, float %e8, float %e9, float %e10, float %e11, float %e12, float %e13, float %e14, float %e15) {
340 ; RV32-LABEL: buildvec_v16f32:
342 ; RV32-NEXT: addi sp, sp, -128
343 ; RV32-NEXT: .cfi_def_cfa_offset 128
344 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
345 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
346 ; RV32-NEXT: .cfi_offset ra, -4
347 ; RV32-NEXT: .cfi_offset s0, -8
348 ; RV32-NEXT: addi s0, sp, 128
349 ; RV32-NEXT: .cfi_def_cfa s0, 0
350 ; RV32-NEXT: andi sp, sp, -64
351 ; RV32-NEXT: sw a7, 60(sp)
352 ; RV32-NEXT: sw a6, 56(sp)
353 ; RV32-NEXT: sw a5, 52(sp)
354 ; RV32-NEXT: sw a4, 48(sp)
355 ; RV32-NEXT: sw a3, 44(sp)
356 ; RV32-NEXT: sw a2, 40(sp)
357 ; RV32-NEXT: sw a1, 36(sp)
358 ; RV32-NEXT: sw a0, 32(sp)
359 ; RV32-NEXT: fsw fa7, 28(sp)
360 ; RV32-NEXT: fsw fa6, 24(sp)
361 ; RV32-NEXT: fsw fa5, 20(sp)
362 ; RV32-NEXT: fsw fa4, 16(sp)
363 ; RV32-NEXT: fsw fa3, 12(sp)
364 ; RV32-NEXT: fsw fa2, 8(sp)
365 ; RV32-NEXT: fsw fa1, 4(sp)
366 ; RV32-NEXT: fsw fa0, 0(sp)
367 ; RV32-NEXT: mv a0, sp
368 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
369 ; RV32-NEXT: vle32.v v8, (a0)
370 ; RV32-NEXT: addi sp, s0, -128
371 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
372 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
373 ; RV32-NEXT: addi sp, sp, 128
376 ; RV64-LABEL: buildvec_v16f32:
378 ; RV64-NEXT: addi sp, sp, -128
379 ; RV64-NEXT: .cfi_def_cfa_offset 128
380 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
381 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
382 ; RV64-NEXT: .cfi_offset ra, -8
383 ; RV64-NEXT: .cfi_offset s0, -16
384 ; RV64-NEXT: addi s0, sp, 128
385 ; RV64-NEXT: .cfi_def_cfa s0, 0
386 ; RV64-NEXT: andi sp, sp, -64
387 ; RV64-NEXT: fmv.w.x ft0, a0
388 ; RV64-NEXT: fmv.w.x ft1, a1
389 ; RV64-NEXT: fmv.w.x ft2, a2
390 ; RV64-NEXT: fmv.w.x ft3, a3
391 ; RV64-NEXT: fmv.w.x ft4, a4
392 ; RV64-NEXT: fmv.w.x ft5, a5
393 ; RV64-NEXT: fmv.w.x ft6, a6
394 ; RV64-NEXT: fmv.w.x ft7, a7
395 ; RV64-NEXT: fsw fa7, 28(sp)
396 ; RV64-NEXT: fsw fa6, 24(sp)
397 ; RV64-NEXT: fsw fa5, 20(sp)
398 ; RV64-NEXT: fsw fa4, 16(sp)
399 ; RV64-NEXT: fsw fa3, 12(sp)
400 ; RV64-NEXT: fsw fa2, 8(sp)
401 ; RV64-NEXT: fsw fa1, 4(sp)
402 ; RV64-NEXT: fsw fa0, 0(sp)
403 ; RV64-NEXT: fsw ft7, 60(sp)
404 ; RV64-NEXT: fsw ft6, 56(sp)
405 ; RV64-NEXT: fsw ft5, 52(sp)
406 ; RV64-NEXT: fsw ft4, 48(sp)
407 ; RV64-NEXT: fsw ft3, 44(sp)
408 ; RV64-NEXT: fsw ft2, 40(sp)
409 ; RV64-NEXT: fsw ft1, 36(sp)
410 ; RV64-NEXT: fsw ft0, 32(sp)
411 ; RV64-NEXT: mv a0, sp
412 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
413 ; RV64-NEXT: vle32.v v8, (a0)
414 ; RV64-NEXT: addi sp, s0, -128
415 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
416 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
417 ; RV64-NEXT: addi sp, sp, 128
419 %v0 = insertelement <16 x float> poison, float %e0, i64 0
420 %v1 = insertelement <16 x float> %v0, float %e1, i64 1
421 %v2 = insertelement <16 x float> %v1, float %e2, i64 2
422 %v3 = insertelement <16 x float> %v2, float %e3, i64 3
423 %v4 = insertelement <16 x float> %v3, float %e4, i64 4
424 %v5 = insertelement <16 x float> %v4, float %e5, i64 5
425 %v6 = insertelement <16 x float> %v5, float %e6, i64 6
426 %v7 = insertelement <16 x float> %v6, float %e7, i64 7
427 %v8 = insertelement <16 x float> %v7, float %e8, i64 8
428 %v9 = insertelement <16 x float> %v8, float %e9, i64 9
429 %v10 = insertelement <16 x float> %v9, float %e10, i64 10
430 %v11 = insertelement <16 x float> %v10, float %e11, i64 11
431 %v12 = insertelement <16 x float> %v11, float %e12, i64 12
432 %v13 = insertelement <16 x float> %v12, float %e13, i64 13
433 %v14 = insertelement <16 x float> %v13, float %e14, i64 14
434 %v15 = insertelement <16 x float> %v14, float %e15, i64 15
435 ret <16 x float> %v15
438 define <32 x float> @buildvec_v32f32(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7, float %e8, float %e9, float %e10, float %e11, float %e12, float %e13, float %e14, float %e15, float %e16, float %e17, float %e18, float %e19, float %e20, float %e21, float %e22, float %e23, float %e24, float %e25, float %e26, float %e27, float %e28, float %e29, float %e30, float %e31) {
439 ; RV32-LABEL: buildvec_v32f32:
441 ; RV32-NEXT: addi sp, sp, -256
442 ; RV32-NEXT: .cfi_def_cfa_offset 256
443 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
444 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
445 ; RV32-NEXT: fsd fs0, 240(sp) # 8-byte Folded Spill
446 ; RV32-NEXT: fsd fs1, 232(sp) # 8-byte Folded Spill
447 ; RV32-NEXT: fsd fs2, 224(sp) # 8-byte Folded Spill
448 ; RV32-NEXT: fsd fs3, 216(sp) # 8-byte Folded Spill
449 ; RV32-NEXT: .cfi_offset ra, -4
450 ; RV32-NEXT: .cfi_offset s0, -8
451 ; RV32-NEXT: .cfi_offset fs0, -16
452 ; RV32-NEXT: .cfi_offset fs1, -24
453 ; RV32-NEXT: .cfi_offset fs2, -32
454 ; RV32-NEXT: .cfi_offset fs3, -40
455 ; RV32-NEXT: addi s0, sp, 256
456 ; RV32-NEXT: .cfi_def_cfa s0, 0
457 ; RV32-NEXT: andi sp, sp, -128
458 ; RV32-NEXT: flw ft0, 0(s0)
459 ; RV32-NEXT: flw ft1, 4(s0)
460 ; RV32-NEXT: flw ft2, 8(s0)
461 ; RV32-NEXT: flw ft3, 12(s0)
462 ; RV32-NEXT: flw ft4, 16(s0)
463 ; RV32-NEXT: flw ft5, 20(s0)
464 ; RV32-NEXT: flw ft6, 24(s0)
465 ; RV32-NEXT: flw ft7, 28(s0)
466 ; RV32-NEXT: flw ft8, 32(s0)
467 ; RV32-NEXT: flw ft9, 36(s0)
468 ; RV32-NEXT: flw ft10, 40(s0)
469 ; RV32-NEXT: flw ft11, 44(s0)
470 ; RV32-NEXT: flw fs0, 60(s0)
471 ; RV32-NEXT: flw fs1, 56(s0)
472 ; RV32-NEXT: flw fs2, 52(s0)
473 ; RV32-NEXT: flw fs3, 48(s0)
474 ; RV32-NEXT: fsw fs0, 124(sp)
475 ; RV32-NEXT: fsw fs1, 120(sp)
476 ; RV32-NEXT: fsw fs2, 116(sp)
477 ; RV32-NEXT: fsw fs3, 112(sp)
478 ; RV32-NEXT: fsw ft11, 108(sp)
479 ; RV32-NEXT: fsw ft10, 104(sp)
480 ; RV32-NEXT: fsw ft9, 100(sp)
481 ; RV32-NEXT: fsw ft8, 96(sp)
482 ; RV32-NEXT: fsw ft7, 92(sp)
483 ; RV32-NEXT: fsw ft6, 88(sp)
484 ; RV32-NEXT: fsw ft5, 84(sp)
485 ; RV32-NEXT: fsw ft4, 80(sp)
486 ; RV32-NEXT: fsw ft3, 76(sp)
487 ; RV32-NEXT: fsw ft2, 72(sp)
488 ; RV32-NEXT: fsw ft1, 68(sp)
489 ; RV32-NEXT: fsw ft0, 64(sp)
490 ; RV32-NEXT: sw a7, 60(sp)
491 ; RV32-NEXT: sw a6, 56(sp)
492 ; RV32-NEXT: sw a5, 52(sp)
493 ; RV32-NEXT: sw a4, 48(sp)
494 ; RV32-NEXT: sw a3, 44(sp)
495 ; RV32-NEXT: sw a2, 40(sp)
496 ; RV32-NEXT: sw a1, 36(sp)
497 ; RV32-NEXT: sw a0, 32(sp)
498 ; RV32-NEXT: fsw fa7, 28(sp)
499 ; RV32-NEXT: fsw fa6, 24(sp)
500 ; RV32-NEXT: fsw fa5, 20(sp)
501 ; RV32-NEXT: fsw fa4, 16(sp)
502 ; RV32-NEXT: fsw fa3, 12(sp)
503 ; RV32-NEXT: fsw fa2, 8(sp)
504 ; RV32-NEXT: fsw fa1, 4(sp)
505 ; RV32-NEXT: fsw fa0, 0(sp)
506 ; RV32-NEXT: li a0, 32
507 ; RV32-NEXT: mv a1, sp
508 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
509 ; RV32-NEXT: vle32.v v8, (a1)
510 ; RV32-NEXT: addi sp, s0, -256
511 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
512 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
513 ; RV32-NEXT: fld fs0, 240(sp) # 8-byte Folded Reload
514 ; RV32-NEXT: fld fs1, 232(sp) # 8-byte Folded Reload
515 ; RV32-NEXT: fld fs2, 224(sp) # 8-byte Folded Reload
516 ; RV32-NEXT: fld fs3, 216(sp) # 8-byte Folded Reload
517 ; RV32-NEXT: addi sp, sp, 256
520 ; RV64-LABEL: buildvec_v32f32:
522 ; RV64-NEXT: addi sp, sp, -256
523 ; RV64-NEXT: .cfi_def_cfa_offset 256
524 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
525 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
526 ; RV64-NEXT: fsd fs0, 232(sp) # 8-byte Folded Spill
527 ; RV64-NEXT: fsd fs1, 224(sp) # 8-byte Folded Spill
528 ; RV64-NEXT: fsd fs2, 216(sp) # 8-byte Folded Spill
529 ; RV64-NEXT: fsd fs3, 208(sp) # 8-byte Folded Spill
530 ; RV64-NEXT: fsd fs4, 200(sp) # 8-byte Folded Spill
531 ; RV64-NEXT: fsd fs5, 192(sp) # 8-byte Folded Spill
532 ; RV64-NEXT: fsd fs6, 184(sp) # 8-byte Folded Spill
533 ; RV64-NEXT: fsd fs7, 176(sp) # 8-byte Folded Spill
534 ; RV64-NEXT: fsd fs8, 168(sp) # 8-byte Folded Spill
535 ; RV64-NEXT: fsd fs9, 160(sp) # 8-byte Folded Spill
536 ; RV64-NEXT: fsd fs10, 152(sp) # 8-byte Folded Spill
537 ; RV64-NEXT: fsd fs11, 144(sp) # 8-byte Folded Spill
538 ; RV64-NEXT: .cfi_offset ra, -8
539 ; RV64-NEXT: .cfi_offset s0, -16
540 ; RV64-NEXT: .cfi_offset fs0, -24
541 ; RV64-NEXT: .cfi_offset fs1, -32
542 ; RV64-NEXT: .cfi_offset fs2, -40
543 ; RV64-NEXT: .cfi_offset fs3, -48
544 ; RV64-NEXT: .cfi_offset fs4, -56
545 ; RV64-NEXT: .cfi_offset fs5, -64
546 ; RV64-NEXT: .cfi_offset fs6, -72
547 ; RV64-NEXT: .cfi_offset fs7, -80
548 ; RV64-NEXT: .cfi_offset fs8, -88
549 ; RV64-NEXT: .cfi_offset fs9, -96
550 ; RV64-NEXT: .cfi_offset fs10, -104
551 ; RV64-NEXT: .cfi_offset fs11, -112
552 ; RV64-NEXT: addi s0, sp, 256
553 ; RV64-NEXT: .cfi_def_cfa s0, 0
554 ; RV64-NEXT: andi sp, sp, -128
555 ; RV64-NEXT: fmv.w.x ft0, a0
556 ; RV64-NEXT: fmv.w.x ft1, a1
557 ; RV64-NEXT: fmv.w.x ft2, a2
558 ; RV64-NEXT: fmv.w.x ft3, a3
559 ; RV64-NEXT: fmv.w.x ft4, a4
560 ; RV64-NEXT: fmv.w.x ft5, a5
561 ; RV64-NEXT: fmv.w.x ft6, a6
562 ; RV64-NEXT: fmv.w.x ft7, a7
563 ; RV64-NEXT: flw ft8, 0(s0)
564 ; RV64-NEXT: flw ft9, 8(s0)
565 ; RV64-NEXT: flw ft10, 16(s0)
566 ; RV64-NEXT: flw ft11, 24(s0)
567 ; RV64-NEXT: flw fs0, 32(s0)
568 ; RV64-NEXT: flw fs1, 40(s0)
569 ; RV64-NEXT: flw fs2, 48(s0)
570 ; RV64-NEXT: flw fs3, 56(s0)
571 ; RV64-NEXT: flw fs4, 64(s0)
572 ; RV64-NEXT: flw fs5, 72(s0)
573 ; RV64-NEXT: flw fs6, 80(s0)
574 ; RV64-NEXT: flw fs7, 88(s0)
575 ; RV64-NEXT: flw fs8, 120(s0)
576 ; RV64-NEXT: flw fs9, 112(s0)
577 ; RV64-NEXT: flw fs10, 104(s0)
578 ; RV64-NEXT: flw fs11, 96(s0)
579 ; RV64-NEXT: fsw fs8, 124(sp)
580 ; RV64-NEXT: fsw fs9, 120(sp)
581 ; RV64-NEXT: fsw fs10, 116(sp)
582 ; RV64-NEXT: fsw fs11, 112(sp)
583 ; RV64-NEXT: fsw fs7, 108(sp)
584 ; RV64-NEXT: fsw fs6, 104(sp)
585 ; RV64-NEXT: fsw fs5, 100(sp)
586 ; RV64-NEXT: fsw fs4, 96(sp)
587 ; RV64-NEXT: fsw fs3, 92(sp)
588 ; RV64-NEXT: fsw fs2, 88(sp)
589 ; RV64-NEXT: fsw fs1, 84(sp)
590 ; RV64-NEXT: fsw fs0, 80(sp)
591 ; RV64-NEXT: fsw ft11, 76(sp)
592 ; RV64-NEXT: fsw ft10, 72(sp)
593 ; RV64-NEXT: fsw ft9, 68(sp)
594 ; RV64-NEXT: fsw ft8, 64(sp)
595 ; RV64-NEXT: fsw fa7, 28(sp)
596 ; RV64-NEXT: fsw fa6, 24(sp)
597 ; RV64-NEXT: fsw fa5, 20(sp)
598 ; RV64-NEXT: fsw fa4, 16(sp)
599 ; RV64-NEXT: fsw fa3, 12(sp)
600 ; RV64-NEXT: fsw fa2, 8(sp)
601 ; RV64-NEXT: fsw fa1, 4(sp)
602 ; RV64-NEXT: fsw fa0, 0(sp)
603 ; RV64-NEXT: fsw ft7, 60(sp)
604 ; RV64-NEXT: fsw ft6, 56(sp)
605 ; RV64-NEXT: fsw ft5, 52(sp)
606 ; RV64-NEXT: fsw ft4, 48(sp)
607 ; RV64-NEXT: fsw ft3, 44(sp)
608 ; RV64-NEXT: fsw ft2, 40(sp)
609 ; RV64-NEXT: fsw ft1, 36(sp)
610 ; RV64-NEXT: fsw ft0, 32(sp)
611 ; RV64-NEXT: li a0, 32
612 ; RV64-NEXT: mv a1, sp
613 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
614 ; RV64-NEXT: vle32.v v8, (a1)
615 ; RV64-NEXT: addi sp, s0, -256
616 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
617 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
618 ; RV64-NEXT: fld fs0, 232(sp) # 8-byte Folded Reload
619 ; RV64-NEXT: fld fs1, 224(sp) # 8-byte Folded Reload
620 ; RV64-NEXT: fld fs2, 216(sp) # 8-byte Folded Reload
621 ; RV64-NEXT: fld fs3, 208(sp) # 8-byte Folded Reload
622 ; RV64-NEXT: fld fs4, 200(sp) # 8-byte Folded Reload
623 ; RV64-NEXT: fld fs5, 192(sp) # 8-byte Folded Reload
624 ; RV64-NEXT: fld fs6, 184(sp) # 8-byte Folded Reload
625 ; RV64-NEXT: fld fs7, 176(sp) # 8-byte Folded Reload
626 ; RV64-NEXT: fld fs8, 168(sp) # 8-byte Folded Reload
627 ; RV64-NEXT: fld fs9, 160(sp) # 8-byte Folded Reload
628 ; RV64-NEXT: fld fs10, 152(sp) # 8-byte Folded Reload
629 ; RV64-NEXT: fld fs11, 144(sp) # 8-byte Folded Reload
630 ; RV64-NEXT: addi sp, sp, 256
632 %v0 = insertelement <32 x float> poison, float %e0, i64 0
633 %v1 = insertelement <32 x float> %v0, float %e1, i64 1
634 %v2 = insertelement <32 x float> %v1, float %e2, i64 2
635 %v3 = insertelement <32 x float> %v2, float %e3, i64 3
636 %v4 = insertelement <32 x float> %v3, float %e4, i64 4
637 %v5 = insertelement <32 x float> %v4, float %e5, i64 5
638 %v6 = insertelement <32 x float> %v5, float %e6, i64 6
639 %v7 = insertelement <32 x float> %v6, float %e7, i64 7
640 %v8 = insertelement <32 x float> %v7, float %e8, i64 8
641 %v9 = insertelement <32 x float> %v8, float %e9, i64 9
642 %v10 = insertelement <32 x float> %v9, float %e10, i64 10
643 %v11 = insertelement <32 x float> %v10, float %e11, i64 11
644 %v12 = insertelement <32 x float> %v11, float %e12, i64 12
645 %v13 = insertelement <32 x float> %v12, float %e13, i64 13
646 %v14 = insertelement <32 x float> %v13, float %e14, i64 14
647 %v15 = insertelement <32 x float> %v14, float %e15, i64 15
648 %v16 = insertelement <32 x float> %v15, float %e16, i64 16
649 %v17 = insertelement <32 x float> %v16, float %e17, i64 17
650 %v18 = insertelement <32 x float> %v17, float %e18, i64 18
651 %v19 = insertelement <32 x float> %v18, float %e19, i64 19
652 %v20 = insertelement <32 x float> %v19, float %e20, i64 20
653 %v21 = insertelement <32 x float> %v20, float %e21, i64 21
654 %v22 = insertelement <32 x float> %v21, float %e22, i64 22
655 %v23 = insertelement <32 x float> %v22, float %e23, i64 23
656 %v24 = insertelement <32 x float> %v23, float %e24, i64 24
657 %v25 = insertelement <32 x float> %v24, float %e25, i64 25
658 %v26 = insertelement <32 x float> %v25, float %e26, i64 26
659 %v27 = insertelement <32 x float> %v26, float %e27, i64 27
660 %v28 = insertelement <32 x float> %v27, float %e28, i64 28
661 %v29 = insertelement <32 x float> %v28, float %e29, i64 29
662 %v30 = insertelement <32 x float> %v29, float %e30, i64 30
663 %v31 = insertelement <32 x float> %v30, float %e31, i64 31
664 ret <32 x float> %v31
667 define <8 x double> @buildvec_v8f64(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) {
668 ; RV32-LABEL: buildvec_v8f64:
670 ; RV32-NEXT: addi sp, sp, -128
671 ; RV32-NEXT: .cfi_def_cfa_offset 128
672 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
673 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
674 ; RV32-NEXT: .cfi_offset ra, -4
675 ; RV32-NEXT: .cfi_offset s0, -8
676 ; RV32-NEXT: addi s0, sp, 128
677 ; RV32-NEXT: .cfi_def_cfa s0, 0
678 ; RV32-NEXT: andi sp, sp, -64
679 ; RV32-NEXT: fsd fa7, 56(sp)
680 ; RV32-NEXT: fsd fa6, 48(sp)
681 ; RV32-NEXT: fsd fa5, 40(sp)
682 ; RV32-NEXT: fsd fa4, 32(sp)
683 ; RV32-NEXT: fsd fa3, 24(sp)
684 ; RV32-NEXT: fsd fa2, 16(sp)
685 ; RV32-NEXT: fsd fa1, 8(sp)
686 ; RV32-NEXT: fsd fa0, 0(sp)
687 ; RV32-NEXT: mv a0, sp
688 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
689 ; RV32-NEXT: vle64.v v8, (a0)
690 ; RV32-NEXT: addi sp, s0, -128
691 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
692 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
693 ; RV32-NEXT: addi sp, sp, 128
696 ; RV64-LABEL: buildvec_v8f64:
698 ; RV64-NEXT: addi sp, sp, -128
699 ; RV64-NEXT: .cfi_def_cfa_offset 128
700 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
701 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
702 ; RV64-NEXT: .cfi_offset ra, -8
703 ; RV64-NEXT: .cfi_offset s0, -16
704 ; RV64-NEXT: addi s0, sp, 128
705 ; RV64-NEXT: .cfi_def_cfa s0, 0
706 ; RV64-NEXT: andi sp, sp, -64
707 ; RV64-NEXT: fsd fa7, 56(sp)
708 ; RV64-NEXT: fsd fa6, 48(sp)
709 ; RV64-NEXT: fsd fa5, 40(sp)
710 ; RV64-NEXT: fsd fa4, 32(sp)
711 ; RV64-NEXT: fsd fa3, 24(sp)
712 ; RV64-NEXT: fsd fa2, 16(sp)
713 ; RV64-NEXT: fsd fa1, 8(sp)
714 ; RV64-NEXT: fsd fa0, 0(sp)
715 ; RV64-NEXT: mv a0, sp
716 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
717 ; RV64-NEXT: vle64.v v8, (a0)
718 ; RV64-NEXT: addi sp, s0, -128
719 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
720 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
721 ; RV64-NEXT: addi sp, sp, 128
723 %v0 = insertelement <8 x double> poison, double %e0, i64 0
724 %v1 = insertelement <8 x double> %v0, double %e1, i64 1
725 %v2 = insertelement <8 x double> %v1, double %e2, i64 2
726 %v3 = insertelement <8 x double> %v2, double %e3, i64 3
727 %v4 = insertelement <8 x double> %v3, double %e4, i64 4
728 %v5 = insertelement <8 x double> %v4, double %e5, i64 5
729 %v6 = insertelement <8 x double> %v5, double %e6, i64 6
730 %v7 = insertelement <8 x double> %v6, double %e7, i64 7
734 define <16 x double> @buildvec_v16f64(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15) {
735 ; RV32-LABEL: buildvec_v16f64:
737 ; RV32-NEXT: addi sp, sp, -384
738 ; RV32-NEXT: .cfi_def_cfa_offset 384
739 ; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
740 ; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
741 ; RV32-NEXT: .cfi_offset ra, -4
742 ; RV32-NEXT: .cfi_offset s0, -8
743 ; RV32-NEXT: addi s0, sp, 384
744 ; RV32-NEXT: .cfi_def_cfa s0, 0
745 ; RV32-NEXT: andi sp, sp, -128
746 ; RV32-NEXT: sw a0, 120(sp)
747 ; RV32-NEXT: sw a1, 124(sp)
748 ; RV32-NEXT: fld ft0, 120(sp)
749 ; RV32-NEXT: sw a2, 120(sp)
750 ; RV32-NEXT: sw a3, 124(sp)
751 ; RV32-NEXT: fld ft1, 120(sp)
752 ; RV32-NEXT: sw a4, 120(sp)
753 ; RV32-NEXT: sw a5, 124(sp)
754 ; RV32-NEXT: fld ft2, 120(sp)
755 ; RV32-NEXT: sw a6, 120(sp)
756 ; RV32-NEXT: sw a7, 124(sp)
757 ; RV32-NEXT: fld ft3, 120(sp)
758 ; RV32-NEXT: fld ft4, 24(s0)
759 ; RV32-NEXT: fld ft5, 16(s0)
760 ; RV32-NEXT: fld ft6, 8(s0)
761 ; RV32-NEXT: fld ft7, 0(s0)
762 ; RV32-NEXT: fsd ft4, 248(sp)
763 ; RV32-NEXT: fsd ft5, 240(sp)
764 ; RV32-NEXT: fsd ft6, 232(sp)
765 ; RV32-NEXT: fsd ft7, 224(sp)
766 ; RV32-NEXT: fsd fa7, 184(sp)
767 ; RV32-NEXT: fsd fa6, 176(sp)
768 ; RV32-NEXT: fsd fa5, 168(sp)
769 ; RV32-NEXT: fsd fa4, 160(sp)
770 ; RV32-NEXT: fsd fa3, 152(sp)
771 ; RV32-NEXT: fsd fa2, 144(sp)
772 ; RV32-NEXT: fsd fa1, 136(sp)
773 ; RV32-NEXT: fsd fa0, 128(sp)
774 ; RV32-NEXT: fsd ft3, 216(sp)
775 ; RV32-NEXT: fsd ft2, 208(sp)
776 ; RV32-NEXT: fsd ft1, 200(sp)
777 ; RV32-NEXT: fsd ft0, 192(sp)
778 ; RV32-NEXT: addi a0, sp, 128
779 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
780 ; RV32-NEXT: vle64.v v8, (a0)
781 ; RV32-NEXT: addi sp, s0, -384
782 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
783 ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
784 ; RV32-NEXT: addi sp, sp, 384
787 ; RV64-LABEL: buildvec_v16f64:
789 ; RV64-NEXT: addi sp, sp, -256
790 ; RV64-NEXT: .cfi_def_cfa_offset 256
791 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
792 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
793 ; RV64-NEXT: .cfi_offset ra, -8
794 ; RV64-NEXT: .cfi_offset s0, -16
795 ; RV64-NEXT: addi s0, sp, 256
796 ; RV64-NEXT: .cfi_def_cfa s0, 0
797 ; RV64-NEXT: andi sp, sp, -128
798 ; RV64-NEXT: sd a7, 120(sp)
799 ; RV64-NEXT: sd a6, 112(sp)
800 ; RV64-NEXT: sd a5, 104(sp)
801 ; RV64-NEXT: sd a4, 96(sp)
802 ; RV64-NEXT: sd a3, 88(sp)
803 ; RV64-NEXT: sd a2, 80(sp)
804 ; RV64-NEXT: sd a1, 72(sp)
805 ; RV64-NEXT: sd a0, 64(sp)
806 ; RV64-NEXT: fsd fa7, 56(sp)
807 ; RV64-NEXT: fsd fa6, 48(sp)
808 ; RV64-NEXT: fsd fa5, 40(sp)
809 ; RV64-NEXT: fsd fa4, 32(sp)
810 ; RV64-NEXT: fsd fa3, 24(sp)
811 ; RV64-NEXT: fsd fa2, 16(sp)
812 ; RV64-NEXT: fsd fa1, 8(sp)
813 ; RV64-NEXT: fsd fa0, 0(sp)
814 ; RV64-NEXT: mv a0, sp
815 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
816 ; RV64-NEXT: vle64.v v8, (a0)
817 ; RV64-NEXT: addi sp, s0, -256
818 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
819 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
820 ; RV64-NEXT: addi sp, sp, 256
822 %v0 = insertelement <16 x double> poison, double %e0, i64 0
823 %v1 = insertelement <16 x double> %v0, double %e1, i64 1
824 %v2 = insertelement <16 x double> %v1, double %e2, i64 2
825 %v3 = insertelement <16 x double> %v2, double %e3, i64 3
826 %v4 = insertelement <16 x double> %v3, double %e4, i64 4
827 %v5 = insertelement <16 x double> %v4, double %e5, i64 5
828 %v6 = insertelement <16 x double> %v5, double %e6, i64 6
829 %v7 = insertelement <16 x double> %v6, double %e7, i64 7
830 %v8 = insertelement <16 x double> %v7, double %e8, i64 8
831 %v9 = insertelement <16 x double> %v8, double %e9, i64 9
832 %v10 = insertelement <16 x double> %v9, double %e10, i64 10
833 %v11 = insertelement <16 x double> %v10, double %e11, i64 11
834 %v12 = insertelement <16 x double> %v11, double %e12, i64 12
835 %v13 = insertelement <16 x double> %v12, double %e13, i64 13
836 %v14 = insertelement <16 x double> %v13, double %e14, i64 14
837 %v15 = insertelement <16 x double> %v14, double %e15, i64 15
838 ret <16 x double> %v15
841 define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15, double %e16, double %e17, double %e18, double %e19, double %e20, double %e21, double %e22, double %e23, double %e24, double %e25, double %e26, double %e27, double %e28, double %e29, double %e30, double %e31) {
842 ; RV32-LABEL: buildvec_v32f64:
844 ; RV32-NEXT: addi sp, sp, -512
845 ; RV32-NEXT: .cfi_def_cfa_offset 512
846 ; RV32-NEXT: sw ra, 508(sp) # 4-byte Folded Spill
847 ; RV32-NEXT: sw s0, 504(sp) # 4-byte Folded Spill
848 ; RV32-NEXT: fsd fs0, 496(sp) # 8-byte Folded Spill
849 ; RV32-NEXT: fsd fs1, 488(sp) # 8-byte Folded Spill
850 ; RV32-NEXT: fsd fs2, 480(sp) # 8-byte Folded Spill
851 ; RV32-NEXT: fsd fs3, 472(sp) # 8-byte Folded Spill
852 ; RV32-NEXT: fsd fs4, 464(sp) # 8-byte Folded Spill
853 ; RV32-NEXT: fsd fs5, 456(sp) # 8-byte Folded Spill
854 ; RV32-NEXT: fsd fs6, 448(sp) # 8-byte Folded Spill
855 ; RV32-NEXT: fsd fs7, 440(sp) # 8-byte Folded Spill
856 ; RV32-NEXT: fsd fs8, 432(sp) # 8-byte Folded Spill
857 ; RV32-NEXT: fsd fs9, 424(sp) # 8-byte Folded Spill
858 ; RV32-NEXT: fsd fs10, 416(sp) # 8-byte Folded Spill
859 ; RV32-NEXT: fsd fs11, 408(sp) # 8-byte Folded Spill
860 ; RV32-NEXT: .cfi_offset ra, -4
861 ; RV32-NEXT: .cfi_offset s0, -8
862 ; RV32-NEXT: .cfi_offset fs0, -16
863 ; RV32-NEXT: .cfi_offset fs1, -24
864 ; RV32-NEXT: .cfi_offset fs2, -32
865 ; RV32-NEXT: .cfi_offset fs3, -40
866 ; RV32-NEXT: .cfi_offset fs4, -48
867 ; RV32-NEXT: .cfi_offset fs5, -56
868 ; RV32-NEXT: .cfi_offset fs6, -64
869 ; RV32-NEXT: .cfi_offset fs7, -72
870 ; RV32-NEXT: .cfi_offset fs8, -80
871 ; RV32-NEXT: .cfi_offset fs9, -88
872 ; RV32-NEXT: .cfi_offset fs10, -96
873 ; RV32-NEXT: .cfi_offset fs11, -104
874 ; RV32-NEXT: addi s0, sp, 512
875 ; RV32-NEXT: .cfi_def_cfa s0, 0
876 ; RV32-NEXT: andi sp, sp, -128
877 ; RV32-NEXT: sw a0, 120(sp)
878 ; RV32-NEXT: sw a1, 124(sp)
879 ; RV32-NEXT: fld ft0, 120(sp)
880 ; RV32-NEXT: sw a2, 120(sp)
881 ; RV32-NEXT: sw a3, 124(sp)
882 ; RV32-NEXT: fld ft1, 120(sp)
883 ; RV32-NEXT: sw a4, 120(sp)
884 ; RV32-NEXT: sw a5, 124(sp)
885 ; RV32-NEXT: fld ft2, 120(sp)
886 ; RV32-NEXT: sw a6, 120(sp)
887 ; RV32-NEXT: sw a7, 124(sp)
888 ; RV32-NEXT: fld ft3, 120(sp)
889 ; RV32-NEXT: fld ft4, 0(s0)
890 ; RV32-NEXT: fld ft5, 8(s0)
891 ; RV32-NEXT: fld ft6, 16(s0)
892 ; RV32-NEXT: fld ft7, 24(s0)
893 ; RV32-NEXT: fld ft8, 32(s0)
894 ; RV32-NEXT: fld ft9, 40(s0)
895 ; RV32-NEXT: fld ft10, 48(s0)
896 ; RV32-NEXT: fld ft11, 56(s0)
897 ; RV32-NEXT: fld fs0, 64(s0)
898 ; RV32-NEXT: fld fs1, 72(s0)
899 ; RV32-NEXT: fld fs2, 80(s0)
900 ; RV32-NEXT: fld fs3, 88(s0)
901 ; RV32-NEXT: fld fs4, 96(s0)
902 ; RV32-NEXT: fld fs5, 104(s0)
903 ; RV32-NEXT: fld fs6, 112(s0)
904 ; RV32-NEXT: fld fs7, 120(s0)
905 ; RV32-NEXT: fld fs8, 152(s0)
906 ; RV32-NEXT: fld fs9, 144(s0)
907 ; RV32-NEXT: fld fs10, 136(s0)
908 ; RV32-NEXT: fld fs11, 128(s0)
909 ; RV32-NEXT: fsd fs8, 248(sp)
910 ; RV32-NEXT: fsd fs9, 240(sp)
911 ; RV32-NEXT: fsd fs10, 232(sp)
912 ; RV32-NEXT: fsd fs11, 224(sp)
913 ; RV32-NEXT: fsd fs7, 216(sp)
914 ; RV32-NEXT: fsd fs6, 208(sp)
915 ; RV32-NEXT: fsd fs5, 200(sp)
916 ; RV32-NEXT: fsd fs4, 192(sp)
917 ; RV32-NEXT: fsd fs3, 184(sp)
918 ; RV32-NEXT: fsd fs2, 176(sp)
919 ; RV32-NEXT: fsd fs1, 168(sp)
920 ; RV32-NEXT: fsd fs0, 160(sp)
921 ; RV32-NEXT: fsd ft11, 152(sp)
922 ; RV32-NEXT: fsd ft10, 144(sp)
923 ; RV32-NEXT: fsd ft9, 136(sp)
924 ; RV32-NEXT: fsd ft8, 128(sp)
925 ; RV32-NEXT: fsd ft7, 376(sp)
926 ; RV32-NEXT: fsd ft6, 368(sp)
927 ; RV32-NEXT: fsd ft5, 360(sp)
928 ; RV32-NEXT: fsd ft4, 352(sp)
929 ; RV32-NEXT: fsd fa7, 312(sp)
930 ; RV32-NEXT: fsd fa6, 304(sp)
931 ; RV32-NEXT: fsd fa5, 296(sp)
932 ; RV32-NEXT: fsd fa4, 288(sp)
933 ; RV32-NEXT: fsd fa3, 280(sp)
934 ; RV32-NEXT: fsd fa2, 272(sp)
935 ; RV32-NEXT: fsd fa1, 264(sp)
936 ; RV32-NEXT: fsd fa0, 256(sp)
937 ; RV32-NEXT: fsd ft3, 344(sp)
938 ; RV32-NEXT: fsd ft2, 336(sp)
939 ; RV32-NEXT: fsd ft1, 328(sp)
940 ; RV32-NEXT: fsd ft0, 320(sp)
941 ; RV32-NEXT: addi a0, sp, 128
942 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
943 ; RV32-NEXT: vle64.v v16, (a0)
944 ; RV32-NEXT: addi a0, sp, 256
945 ; RV32-NEXT: vle64.v v8, (a0)
946 ; RV32-NEXT: addi sp, s0, -512
947 ; RV32-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
948 ; RV32-NEXT: lw s0, 504(sp) # 4-byte Folded Reload
949 ; RV32-NEXT: fld fs0, 496(sp) # 8-byte Folded Reload
950 ; RV32-NEXT: fld fs1, 488(sp) # 8-byte Folded Reload
951 ; RV32-NEXT: fld fs2, 480(sp) # 8-byte Folded Reload
952 ; RV32-NEXT: fld fs3, 472(sp) # 8-byte Folded Reload
953 ; RV32-NEXT: fld fs4, 464(sp) # 8-byte Folded Reload
954 ; RV32-NEXT: fld fs5, 456(sp) # 8-byte Folded Reload
955 ; RV32-NEXT: fld fs6, 448(sp) # 8-byte Folded Reload
956 ; RV32-NEXT: fld fs7, 440(sp) # 8-byte Folded Reload
957 ; RV32-NEXT: fld fs8, 432(sp) # 8-byte Folded Reload
958 ; RV32-NEXT: fld fs9, 424(sp) # 8-byte Folded Reload
959 ; RV32-NEXT: fld fs10, 416(sp) # 8-byte Folded Reload
960 ; RV32-NEXT: fld fs11, 408(sp) # 8-byte Folded Reload
961 ; RV32-NEXT: addi sp, sp, 512
964 ; RV64-LABEL: buildvec_v32f64:
966 ; RV64-NEXT: addi sp, sp, -384
967 ; RV64-NEXT: .cfi_def_cfa_offset 384
968 ; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
969 ; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
970 ; RV64-NEXT: fsd fs0, 360(sp) # 8-byte Folded Spill
971 ; RV64-NEXT: fsd fs1, 352(sp) # 8-byte Folded Spill
972 ; RV64-NEXT: fsd fs2, 344(sp) # 8-byte Folded Spill
973 ; RV64-NEXT: fsd fs3, 336(sp) # 8-byte Folded Spill
974 ; RV64-NEXT: .cfi_offset ra, -8
975 ; RV64-NEXT: .cfi_offset s0, -16
976 ; RV64-NEXT: .cfi_offset fs0, -24
977 ; RV64-NEXT: .cfi_offset fs1, -32
978 ; RV64-NEXT: .cfi_offset fs2, -40
979 ; RV64-NEXT: .cfi_offset fs3, -48
980 ; RV64-NEXT: addi s0, sp, 384
981 ; RV64-NEXT: .cfi_def_cfa s0, 0
982 ; RV64-NEXT: andi sp, sp, -128
983 ; RV64-NEXT: fld ft0, 0(s0)
984 ; RV64-NEXT: fld ft1, 8(s0)
985 ; RV64-NEXT: fld ft2, 16(s0)
986 ; RV64-NEXT: fld ft3, 24(s0)
987 ; RV64-NEXT: fld ft4, 32(s0)
988 ; RV64-NEXT: fld ft5, 40(s0)
989 ; RV64-NEXT: fld ft6, 48(s0)
990 ; RV64-NEXT: fld ft7, 56(s0)
991 ; RV64-NEXT: fld ft8, 64(s0)
992 ; RV64-NEXT: fld ft9, 72(s0)
993 ; RV64-NEXT: fld ft10, 80(s0)
994 ; RV64-NEXT: fld ft11, 88(s0)
995 ; RV64-NEXT: fld fs0, 96(s0)
996 ; RV64-NEXT: fld fs1, 104(s0)
997 ; RV64-NEXT: fld fs2, 112(s0)
998 ; RV64-NEXT: fld fs3, 120(s0)
999 ; RV64-NEXT: sd a7, 248(sp)
1000 ; RV64-NEXT: sd a6, 240(sp)
1001 ; RV64-NEXT: sd a5, 232(sp)
1002 ; RV64-NEXT: sd a4, 224(sp)
1003 ; RV64-NEXT: sd a3, 216(sp)
1004 ; RV64-NEXT: sd a2, 208(sp)
1005 ; RV64-NEXT: sd a1, 200(sp)
1006 ; RV64-NEXT: sd a0, 192(sp)
1007 ; RV64-NEXT: fsd fa7, 184(sp)
1008 ; RV64-NEXT: fsd fa6, 176(sp)
1009 ; RV64-NEXT: fsd fa5, 168(sp)
1010 ; RV64-NEXT: fsd fa4, 160(sp)
1011 ; RV64-NEXT: fsd fa3, 152(sp)
1012 ; RV64-NEXT: fsd fa2, 144(sp)
1013 ; RV64-NEXT: fsd fa1, 136(sp)
1014 ; RV64-NEXT: fsd fa0, 128(sp)
1015 ; RV64-NEXT: fsd fs3, 120(sp)
1016 ; RV64-NEXT: fsd fs2, 112(sp)
1017 ; RV64-NEXT: fsd fs1, 104(sp)
1018 ; RV64-NEXT: fsd fs0, 96(sp)
1019 ; RV64-NEXT: fsd ft11, 88(sp)
1020 ; RV64-NEXT: fsd ft10, 80(sp)
1021 ; RV64-NEXT: fsd ft9, 72(sp)
1022 ; RV64-NEXT: fsd ft8, 64(sp)
1023 ; RV64-NEXT: fsd ft7, 56(sp)
1024 ; RV64-NEXT: fsd ft6, 48(sp)
1025 ; RV64-NEXT: fsd ft5, 40(sp)
1026 ; RV64-NEXT: fsd ft4, 32(sp)
1027 ; RV64-NEXT: fsd ft3, 24(sp)
1028 ; RV64-NEXT: fsd ft2, 16(sp)
1029 ; RV64-NEXT: fsd ft1, 8(sp)
1030 ; RV64-NEXT: fsd ft0, 0(sp)
1031 ; RV64-NEXT: addi a0, sp, 128
1032 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1033 ; RV64-NEXT: vle64.v v8, (a0)
1034 ; RV64-NEXT: mv a0, sp
1035 ; RV64-NEXT: vle64.v v16, (a0)
1036 ; RV64-NEXT: addi sp, s0, -384
1037 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
1038 ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
1039 ; RV64-NEXT: fld fs0, 360(sp) # 8-byte Folded Reload
1040 ; RV64-NEXT: fld fs1, 352(sp) # 8-byte Folded Reload
1041 ; RV64-NEXT: fld fs2, 344(sp) # 8-byte Folded Reload
1042 ; RV64-NEXT: fld fs3, 336(sp) # 8-byte Folded Reload
1043 ; RV64-NEXT: addi sp, sp, 384
1045 %v0 = insertelement <32 x double> poison, double %e0, i64 0
1046 %v1 = insertelement <32 x double> %v0, double %e1, i64 1
1047 %v2 = insertelement <32 x double> %v1, double %e2, i64 2
1048 %v3 = insertelement <32 x double> %v2, double %e3, i64 3
1049 %v4 = insertelement <32 x double> %v3, double %e4, i64 4
1050 %v5 = insertelement <32 x double> %v4, double %e5, i64 5
1051 %v6 = insertelement <32 x double> %v5, double %e6, i64 6
1052 %v7 = insertelement <32 x double> %v6, double %e7, i64 7
1053 %v8 = insertelement <32 x double> %v7, double %e8, i64 8
1054 %v9 = insertelement <32 x double> %v8, double %e9, i64 9
1055 %v10 = insertelement <32 x double> %v9, double %e10, i64 10
1056 %v11 = insertelement <32 x double> %v10, double %e11, i64 11
1057 %v12 = insertelement <32 x double> %v11, double %e12, i64 12
1058 %v13 = insertelement <32 x double> %v12, double %e13, i64 13
1059 %v14 = insertelement <32 x double> %v13, double %e14, i64 14
1060 %v15 = insertelement <32 x double> %v14, double %e15, i64 15
1061 %v16 = insertelement <32 x double> %v15, double %e16, i64 16
1062 %v17 = insertelement <32 x double> %v16, double %e17, i64 17
1063 %v18 = insertelement <32 x double> %v17, double %e18, i64 18
1064 %v19 = insertelement <32 x double> %v18, double %e19, i64 19
1065 %v20 = insertelement <32 x double> %v19, double %e20, i64 20
1066 %v21 = insertelement <32 x double> %v20, double %e21, i64 21
1067 %v22 = insertelement <32 x double> %v21, double %e22, i64 22
1068 %v23 = insertelement <32 x double> %v22, double %e23, i64 23
1069 %v24 = insertelement <32 x double> %v23, double %e24, i64 24
1070 %v25 = insertelement <32 x double> %v24, double %e25, i64 25
1071 %v26 = insertelement <32 x double> %v25, double %e26, i64 26
1072 %v27 = insertelement <32 x double> %v26, double %e27, i64 27
1073 %v28 = insertelement <32 x double> %v27, double %e28, i64 28
1074 %v29 = insertelement <32 x double> %v28, double %e29, i64 29
1075 %v30 = insertelement <32 x double> %v29, double %e30, i64 30
1076 %v31 = insertelement <32 x double> %v30, double %e31, i64 31
1077 ret <32 x double> %v31
1080 ; FIXME: These constants have enough sign bits that we could use vmv.v.x/i and
1081 ; vsext, but we don't support this for FP yet.
1082 define <2 x float> @signbits() {
1084 ret <2 x float> <float 0x36A0000000000000, float 0.000000e+00>
1087 define <2 x half> @vid_v2f16() {
1088 ; CHECK-LABEL: vid_v2f16:
1090 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1091 ; CHECK-NEXT: vid.v v8
1092 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1094 ret <2 x half> <half 0.0, half 1.0>
1097 define <2 x half> @vid_addend1_v2f16() {
1098 ; CHECK-LABEL: vid_addend1_v2f16:
1100 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1101 ; CHECK-NEXT: vid.v v8
1102 ; CHECK-NEXT: vadd.vi v8, v8, 1
1103 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1105 ret <2 x half> <half 1.0, half 2.0>
1108 define <2 x half> @vid_denominator2_v2f16() {
1109 ; CHECK-LABEL: vid_denominator2_v2f16:
1111 ; CHECK-NEXT: lui a0, %hi(.LCPI27_0)
1112 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI27_0)
1113 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1114 ; CHECK-NEXT: vle16.v v8, (a0)
1116 ret <2 x half> <half 0.5, half 1.0>
1119 define <2 x half> @vid_step2_v2f16() {
1120 ; CHECK-LABEL: vid_step2_v2f16:
1122 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1123 ; CHECK-NEXT: vid.v v8
1124 ; CHECK-NEXT: vadd.vv v8, v8, v8
1125 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1127 ret <2 x half> <half 0.0, half 2.0>
1130 define <2 x float> @vid_v2f32() {
1131 ; CHECK-LABEL: vid_v2f32:
1133 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1134 ; CHECK-NEXT: vid.v v8
1135 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1137 ret <2 x float> <float 0.0, float 1.0>
1140 define <2 x float> @vid_addend1_v2f32() {
1141 ; CHECK-LABEL: vid_addend1_v2f32:
1143 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1144 ; CHECK-NEXT: vid.v v8
1145 ; CHECK-NEXT: vadd.vi v8, v8, 1
1146 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1148 ret <2 x float> <float 1.0, float 2.0>
1151 define <2 x float> @vid_denominator2_v2f32() {
1152 ; CHECK-LABEL: vid_denominator2_v2f32:
1154 ; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
1155 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
1156 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1157 ; CHECK-NEXT: vle32.v v8, (a0)
1159 ret <2 x float> <float 0.5, float 1.0>
1162 define <2 x float> @vid_step2_v2f32() {
1163 ; CHECK-LABEL: vid_step2_v2f32:
1165 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1166 ; CHECK-NEXT: vid.v v8
1167 ; CHECK-NEXT: vadd.vv v8, v8, v8
1168 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1170 ret <2 x float> <float 0.0, float 2.0>
1173 define <2 x double> @vid_v2f64() {
1174 ; CHECK-LABEL: vid_v2f64:
1176 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1177 ; CHECK-NEXT: vid.v v8
1178 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1180 ret <2 x double> <double 0.0, double 1.0>
1183 define <2 x double> @vid_addend1_v2f64() {
1184 ; CHECK-LABEL: vid_addend1_v2f64:
1186 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1187 ; CHECK-NEXT: vid.v v8
1188 ; CHECK-NEXT: vadd.vi v8, v8, 1
1189 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1191 ret <2 x double> <double 1.0, double 2.0>
1194 define <2 x double> @vid_denominator2_v2f64() {
1195 ; CHECK-LABEL: vid_denominator2_v2f64:
1197 ; CHECK-NEXT: lui a0, %hi(.LCPI35_0)
1198 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0)
1199 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1200 ; CHECK-NEXT: vle64.v v8, (a0)
1202 ret <2 x double> <double 0.5, double 1.0>
1205 define <2 x double> @vid_step2_v2f64() {
1206 ; CHECK-LABEL: vid_step2_v2f64:
1208 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1209 ; CHECK-NEXT: vid.v v8
1210 ; CHECK-NEXT: vadd.vv v8, v8, v8
1211 ; CHECK-NEXT: vfcvt.f.x.v v8, v8
1213 ret <2 x double> <double 0.0, double 2.0>