1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+sve | FileCheck %s
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5 target triple = "aarch64-unknown-linux-gnu"
7 ; Make sure callers set up the arguments correctly - tests AArch64ISelLowering::LowerCALL
9 define float @foo1(ptr %x0, ptr %x1, ptr %x2) nounwind {
11 ; CHECK: // %bb.0: // %entry
12 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
13 ; CHECK-NEXT: addvl sp, sp, #-4
14 ; CHECK-NEXT: ptrue p0.b
15 ; CHECK-NEXT: fmov s0, #1.00000000
16 ; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
17 ; CHECK-NEXT: mov x0, sp
18 ; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
19 ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2]
20 ; CHECK-NEXT: ptrue p0.d
21 ; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl]
22 ; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
23 ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
24 ; CHECK-NEXT: st1d { z16.d }, p0, [sp]
25 ; CHECK-NEXT: bl callee1
26 ; CHECK-NEXT: addvl sp, sp, #4
27 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
30 %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
31 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
32 %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0)
33 %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1)
34 %4 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %1, ptr %x2)
35 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 0
36 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 1
37 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 2
38 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 3
39 %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %5, i64 0)
40 %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 2)
41 %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 4)
42 %12 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %11, <vscale x 2 x double> %8, i64 6)
43 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 0
44 %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 1
45 %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 2
46 %16 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 3
47 %17 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> undef, <vscale x 2 x double> %13, i64 0)
48 %18 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %17, <vscale x 2 x double> %14, i64 2)
49 %19 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %18, <vscale x 2 x double> %15, i64 4)
50 %20 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %19, <vscale x 2 x double> %16, i64 6)
51 %call = call float @callee1(float 1.000000e+00, <vscale x 8 x double> %12, <vscale x 8 x double> %20, <vscale x 2 x double> %4)
55 define float @foo2(ptr %x0, ptr %x1) nounwind {
57 ; CHECK: // %bb.0: // %entry
58 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
59 ; CHECK-NEXT: addvl sp, sp, #-4
60 ; CHECK-NEXT: sub sp, sp, #16
61 ; CHECK-NEXT: ptrue p0.b
62 ; CHECK-NEXT: fmov s0, #1.00000000
63 ; CHECK-NEXT: add x8, sp, #16
64 ; CHECK-NEXT: add x9, sp, #16
65 ; CHECK-NEXT: mov w2, #2 // =0x2
66 ; CHECK-NEXT: mov w3, #3 // =0x3
67 ; CHECK-NEXT: mov w4, #4 // =0x4
68 ; CHECK-NEXT: mov w5, #5 // =0x5
69 ; CHECK-NEXT: mov w6, #6 // =0x6
70 ; CHECK-NEXT: mov w7, #7 // =0x7
71 ; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
72 ; CHECK-NEXT: mov w0, wzr
73 ; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
74 ; CHECK-NEXT: ptrue p0.d
75 ; CHECK-NEXT: mov w1, #1 // =0x1
76 ; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
77 ; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
78 ; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
79 ; CHECK-NEXT: st1d { z16.d }, p0, [x9]
80 ; CHECK-NEXT: str x8, [sp]
81 ; CHECK-NEXT: bl callee2
82 ; CHECK-NEXT: addvl sp, sp, #4
83 ; CHECK-NEXT: add sp, sp, #16
84 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
87 %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
88 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
89 %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0)
90 %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1)
91 %4 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 0
92 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 1
93 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 2
94 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 3
95 %8 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %4, i64 0)
96 %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %8, <vscale x 2 x double> %5, i64 2)
97 %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 4)
98 %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 6)
99 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 0
100 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 1
101 %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 2
102 %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 3
103 %16 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %12, i64 0)
104 %17 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %16, <vscale x 2 x double> %13, i64 2)
105 %18 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %17, <vscale x 2 x double> %14, i64 4)
106 %19 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %18, <vscale x 2 x double> %15, i64 6)
107 %call = call float @callee2(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, float 1.000000e+00, <vscale x 8 x double> %11, <vscale x 8 x double> %19)
111 define float @foo3(ptr %x0, ptr %x1, ptr %x2) nounwind {
113 ; CHECK: // %bb.0: // %entry
114 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
115 ; CHECK-NEXT: addvl sp, sp, #-3
116 ; CHECK-NEXT: ptrue p0.b
117 ; CHECK-NEXT: fmov s0, #1.00000000
118 ; CHECK-NEXT: fmov s1, #2.00000000
119 ; CHECK-NEXT: ld4d { z2.d - z5.d }, p0/z, [x0]
120 ; CHECK-NEXT: mov x0, sp
121 ; CHECK-NEXT: ld3d { z16.d - z18.d }, p0/z, [x1]
122 ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2]
123 ; CHECK-NEXT: ptrue p0.d
124 ; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
125 ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
126 ; CHECK-NEXT: st1d { z16.d }, p0, [sp]
127 ; CHECK-NEXT: bl callee3
128 ; CHECK-NEXT: addvl sp, sp, #3
129 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
132 %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
133 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
134 %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0)
135 %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1)
136 %4 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %1, ptr %x2)
137 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 0
138 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 1
139 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 2
140 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 3
141 %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %5, i64 0)
142 %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 2)
143 %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 4)
144 %12 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %11, <vscale x 2 x double> %8, i64 6)
145 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} %3, 0
146 %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 1
147 %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 2
148 %16 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> poison, <vscale x 2 x double> %13, i64 0)
149 %17 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> %16 , <vscale x 2 x double> %14, i64 2)
150 %18 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> %17 , <vscale x 2 x double> %15, i64 4)
151 %call = call float @callee3(float 1.000000e+00, float 2.000000e+00, <vscale x 8 x double> %12, <vscale x 6 x double> %18, <vscale x 2 x double> %4)
155 ; Make sure callees read the arguments correctly - tests AArch64ISelLowering::LowerFormalArguments
157 define double @foo4(double %x0, ptr %ptr1, ptr %ptr2, ptr %ptr3, <vscale x 8 x double> %x1, <vscale x 8 x double> %x2, <vscale x 2 x double> %x3) nounwind {
159 ; CHECK: // %bb.0: // %entry
160 ; CHECK-NEXT: ptrue p0.d
161 ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x3, #1, mul vl]
162 ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x3]
163 ; CHECK-NEXT: ld1d { z24.d }, p0/z, [x3, #3, mul vl]
164 ; CHECK-NEXT: ld1d { z25.d }, p0/z, [x3, #2, mul vl]
165 ; CHECK-NEXT: st1d { z4.d }, p0, [x0, #3, mul vl]
166 ; CHECK-NEXT: st1d { z3.d }, p0, [x0, #2, mul vl]
167 ; CHECK-NEXT: st1d { z2.d }, p0, [x0, #1, mul vl]
168 ; CHECK-NEXT: st1d { z1.d }, p0, [x0]
169 ; CHECK-NEXT: st1d { z25.d }, p0, [x1, #2, mul vl]
170 ; CHECK-NEXT: st1d { z24.d }, p0, [x1, #3, mul vl]
171 ; CHECK-NEXT: st1d { z7.d }, p0, [x1]
172 ; CHECK-NEXT: st1d { z6.d }, p0, [x1, #1, mul vl]
173 ; CHECK-NEXT: st1d { z5.d }, p0, [x2]
176 store volatile <vscale x 8 x double> %x1, <vscale x 8 x double>* %ptr1
177 store volatile <vscale x 8 x double> %x2, <vscale x 8 x double>* %ptr2
178 store volatile <vscale x 2 x double> %x3, <vscale x 2 x double>* %ptr3
182 define double @foo5(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, ptr %ptr1, ptr %ptr2, double %x0, <vscale x 8 x double> %x1, <vscale x 8 x double> %x2) nounwind {
184 ; CHECK: // %bb.0: // %entry
185 ; CHECK-NEXT: ptrue p0.d
186 ; CHECK-NEXT: ldr x8, [sp]
187 ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x8, #1, mul vl]
188 ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x8]
189 ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x8, #3, mul vl]
190 ; CHECK-NEXT: ld1d { z24.d }, p0/z, [x8, #2, mul vl]
191 ; CHECK-NEXT: st1d { z4.d }, p0, [x6, #3, mul vl]
192 ; CHECK-NEXT: st1d { z3.d }, p0, [x6, #2, mul vl]
193 ; CHECK-NEXT: st1d { z2.d }, p0, [x6, #1, mul vl]
194 ; CHECK-NEXT: st1d { z1.d }, p0, [x6]
195 ; CHECK-NEXT: st1d { z24.d }, p0, [x7, #2, mul vl]
196 ; CHECK-NEXT: st1d { z7.d }, p0, [x7, #3, mul vl]
197 ; CHECK-NEXT: st1d { z6.d }, p0, [x7]
198 ; CHECK-NEXT: st1d { z5.d }, p0, [x7, #1, mul vl]
201 store volatile <vscale x 8 x double> %x1, <vscale x 8 x double>* %ptr1
202 store volatile <vscale x 8 x double> %x2, <vscale x 8 x double>* %ptr2
206 define double @foo6(double %x0, double %x1, ptr %ptr1, ptr %ptr2, <vscale x 8 x double> %x2, <vscale x 6 x double> %x3) nounwind {
208 ; CHECK: // %bb.0: // %entry
209 ; CHECK-NEXT: ptrue p0.d
210 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x2]
211 ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2, #2, mul vl]
212 ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x2, #1, mul vl]
213 ; CHECK-NEXT: st1d { z5.d }, p0, [x0, #3, mul vl]
214 ; CHECK-NEXT: st1d { z4.d }, p0, [x0, #2, mul vl]
215 ; CHECK-NEXT: st1d { z3.d }, p0, [x0, #1, mul vl]
216 ; CHECK-NEXT: st1d { z2.d }, p0, [x0]
217 ; CHECK-NEXT: st1d { z7.d }, p0, [x1, #1, mul vl]
218 ; CHECK-NEXT: st1d { z6.d }, p0, [x1, #2, mul vl]
219 ; CHECK-NEXT: st1d { z1.d }, p0, [x1]
222 store volatile <vscale x 8 x double> %x2, <vscale x 8 x double>* %ptr1
223 store volatile <vscale x 6 x double> %x3, <vscale x 6 x double>* %ptr2
227 ; Use AAVPCS, SVE register in z0 - z7 used
229 define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, <vscale x 4 x i32> %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, ptr %ptr) nounwind {
230 ; CHECK-LABEL: aavpcs1:
231 ; CHECK: // %bb.0: // %entry
232 ; CHECK-NEXT: ptrue p0.s
233 ; CHECK-NEXT: ldp x8, x9, [sp]
234 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8]
235 ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x7]
236 ; CHECK-NEXT: st1w { z0.s }, p0, [x9]
237 ; CHECK-NEXT: st1w { z1.s }, p0, [x9]
238 ; CHECK-NEXT: st1w { z2.s }, p0, [x9]
239 ; CHECK-NEXT: st1w { z4.s }, p0, [x9]
240 ; CHECK-NEXT: st1w { z5.s }, p0, [x9]
241 ; CHECK-NEXT: st1w { z6.s }, p0, [x9]
242 ; CHECK-NEXT: st1w { z7.s }, p0, [x9]
243 ; CHECK-NEXT: st1w { z24.s }, p0, [x9]
244 ; CHECK-NEXT: st1w { z3.s }, p0, [x9]
247 store volatile <vscale x 4 x i32> %s7, <vscale x 4 x i32>* %ptr
248 store volatile <vscale x 4 x i32> %s8, <vscale x 4 x i32>* %ptr
249 store volatile <vscale x 4 x i32> %s9, <vscale x 4 x i32>* %ptr
250 store volatile <vscale x 4 x i32> %s11, <vscale x 4 x i32>* %ptr
251 store volatile <vscale x 4 x i32> %s12, <vscale x 4 x i32>* %ptr
252 store volatile <vscale x 4 x i32> %s13, <vscale x 4 x i32>* %ptr
253 store volatile <vscale x 4 x i32> %s14, <vscale x 4 x i32>* %ptr
254 store volatile <vscale x 4 x i32> %s15, <vscale x 4 x i32>* %ptr
255 store volatile <vscale x 4 x i32> %s16, <vscale x 4 x i32>* %ptr
259 ; Use AAVPCS, SVE register in z0 - z7 used
261 define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, <vscale x 4 x float> %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12,<vscale x 4 x float> %s13,<vscale x 4 x float> %s14,<vscale x 4 x float> %s15,<vscale x 4 x float> %s16,ptr %ptr) nounwind {
262 ; CHECK-LABEL: aavpcs2:
263 ; CHECK: // %bb.0: // %entry
264 ; CHECK-NEXT: ptrue p0.s
265 ; CHECK-NEXT: ldp x8, x9, [sp]
266 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
267 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7]
268 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6]
269 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5]
270 ; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4]
271 ; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3]
272 ; CHECK-NEXT: ld1w { z6.s }, p0/z, [x1]
273 ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x0]
274 ; CHECK-NEXT: st1w { z7.s }, p0, [x9]
275 ; CHECK-NEXT: st1w { z24.s }, p0, [x9]
276 ; CHECK-NEXT: st1w { z6.s }, p0, [x9]
277 ; CHECK-NEXT: st1w { z5.s }, p0, [x9]
278 ; CHECK-NEXT: st1w { z4.s }, p0, [x9]
279 ; CHECK-NEXT: st1w { z3.s }, p0, [x9]
280 ; CHECK-NEXT: st1w { z2.s }, p0, [x9]
281 ; CHECK-NEXT: st1w { z1.s }, p0, [x9]
282 ; CHECK-NEXT: st1w { z0.s }, p0, [x9]
285 store volatile <vscale x 4 x float> %s7, <vscale x 4 x float>* %ptr
286 store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr
287 store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr
288 store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr
289 store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr
290 store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr
291 store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr
292 store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr
293 store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr
297 ; Use AAVPCS, no SVE register in z0 - z7 used (floats occupy z0 - z7) but predicate arg is used
299 define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, <vscale x 16 x i1> %p0, ptr %ptr) nounwind {
300 ; CHECK-LABEL: aavpcs3:
301 ; CHECK: // %bb.0: // %entry
302 ; CHECK-NEXT: ptrue p0.s
303 ; CHECK-NEXT: ldr x8, [sp]
304 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
305 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7]
306 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6]
307 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5]
308 ; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4]
309 ; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3]
310 ; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2]
311 ; CHECK-NEXT: ld1w { z7.s }, p0/z, [x1]
312 ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x0]
313 ; CHECK-NEXT: ldr x8, [sp, #16]
314 ; CHECK-NEXT: st1w { z24.s }, p0, [x8]
315 ; CHECK-NEXT: st1w { z7.s }, p0, [x8]
316 ; CHECK-NEXT: st1w { z6.s }, p0, [x8]
317 ; CHECK-NEXT: st1w { z5.s }, p0, [x8]
318 ; CHECK-NEXT: st1w { z4.s }, p0, [x8]
319 ; CHECK-NEXT: st1w { z3.s }, p0, [x8]
320 ; CHECK-NEXT: st1w { z2.s }, p0, [x8]
321 ; CHECK-NEXT: st1w { z1.s }, p0, [x8]
322 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
325 store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr
326 store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr
327 store volatile <vscale x 4 x float> %s10, <vscale x 4 x float>* %ptr
328 store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr
329 store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr
330 store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr
331 store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr
332 store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr
333 store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr
337 ; use AAVPCS, SVE register in z0 - z7 used (i32s dont occupy z0 - z7)
339 define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, <vscale x 4 x i32> %s17, ptr %ptr) nounwind {
340 ; CHECK-LABEL: aavpcs4:
341 ; CHECK: // %bb.0: // %entry
342 ; CHECK-NEXT: ptrue p0.s
343 ; CHECK-NEXT: ldr x8, [sp]
344 ; CHECK-NEXT: ldr x9, [sp, #16]
345 ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x8]
346 ; CHECK-NEXT: st1w { z0.s }, p0, [x9]
347 ; CHECK-NEXT: st1w { z1.s }, p0, [x9]
348 ; CHECK-NEXT: st1w { z2.s }, p0, [x9]
349 ; CHECK-NEXT: st1w { z3.s }, p0, [x9]
350 ; CHECK-NEXT: st1w { z4.s }, p0, [x9]
351 ; CHECK-NEXT: st1w { z5.s }, p0, [x9]
352 ; CHECK-NEXT: st1w { z6.s }, p0, [x9]
353 ; CHECK-NEXT: st1w { z7.s }, p0, [x9]
354 ; CHECK-NEXT: st1w { z24.s }, p0, [x9]
357 store volatile <vscale x 4 x i32> %s8, <vscale x 4 x i32>* %ptr
358 store volatile <vscale x 4 x i32> %s9, <vscale x 4 x i32>* %ptr
359 store volatile <vscale x 4 x i32> %s10, <vscale x 4 x i32>* %ptr
360 store volatile <vscale x 4 x i32> %s11, <vscale x 4 x i32>* %ptr
361 store volatile <vscale x 4 x i32> %s12, <vscale x 4 x i32>* %ptr
362 store volatile <vscale x 4 x i32> %s13, <vscale x 4 x i32>* %ptr
363 store volatile <vscale x 4 x i32> %s14, <vscale x 4 x i32>* %ptr
364 store volatile <vscale x 4 x i32> %s15, <vscale x 4 x i32>* %ptr
365 store volatile <vscale x 4 x i32> %s16, <vscale x 4 x i32>* %ptr
369 ; Use AAVPCS, SVE register used in return
371 define <vscale x 4 x float> @aavpcs5(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, ptr %ptr) nounwind {
372 ; CHECK-LABEL: aavpcs5:
373 ; CHECK: // %bb.0: // %entry
374 ; CHECK-NEXT: ptrue p0.s
375 ; CHECK-NEXT: ldr x8, [sp]
376 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8]
377 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x7]
378 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x6]
379 ; CHECK-NEXT: ld1w { z4.s }, p0/z, [x5]
380 ; CHECK-NEXT: ld1w { z5.s }, p0/z, [x4]
381 ; CHECK-NEXT: ld1w { z6.s }, p0/z, [x3]
382 ; CHECK-NEXT: ld1w { z7.s }, p0/z, [x2]
383 ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x1]
384 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
385 ; CHECK-NEXT: ldr x8, [sp, #16]
386 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
387 ; CHECK-NEXT: st1w { z24.s }, p0, [x8]
388 ; CHECK-NEXT: st1w { z7.s }, p0, [x8]
389 ; CHECK-NEXT: st1w { z6.s }, p0, [x8]
390 ; CHECK-NEXT: st1w { z5.s }, p0, [x8]
391 ; CHECK-NEXT: st1w { z4.s }, p0, [x8]
392 ; CHECK-NEXT: st1w { z3.s }, p0, [x8]
393 ; CHECK-NEXT: st1w { z2.s }, p0, [x8]
394 ; CHECK-NEXT: st1w { z1.s }, p0, [x8]
397 store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr
398 store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr
399 store volatile <vscale x 4 x float> %s10, <vscale x 4 x float>* %ptr
400 store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr
401 store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr
402 store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr
403 store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr
404 store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr
405 store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr
406 ret <vscale x 4 x float> %s8
409 define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, ptr %ptr) nounwind {
410 ; CHECK-LABEL: aapcs1:
411 ; CHECK: // %bb.0: // %entry
412 ; CHECK-NEXT: ptrue p0.s
413 ; CHECK-NEXT: ldr x8, [sp]
414 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
415 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7]
416 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6]
417 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5]
418 ; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4]
419 ; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3]
420 ; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2]
421 ; CHECK-NEXT: ld1w { z7.s }, p0/z, [x1]
422 ; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0]
423 ; CHECK-NEXT: ldr x8, [sp, #16]
424 ; CHECK-NEXT: st1w { z16.s }, p0, [x8]
425 ; CHECK-NEXT: st1w { z7.s }, p0, [x8]
426 ; CHECK-NEXT: st1w { z6.s }, p0, [x8]
427 ; CHECK-NEXT: st1w { z5.s }, p0, [x8]
428 ; CHECK-NEXT: st1w { z4.s }, p0, [x8]
429 ; CHECK-NEXT: st1w { z3.s }, p0, [x8]
430 ; CHECK-NEXT: st1w { z2.s }, p0, [x8]
431 ; CHECK-NEXT: st1w { z1.s }, p0, [x8]
432 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
435 store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr
436 store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr
437 store volatile <vscale x 4 x float> %s10, <vscale x 4 x float>* %ptr
438 store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr
439 store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr
440 store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr
441 store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr
442 store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr
443 store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr
447 declare void @non_sve_callee_high_range(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)
449 define void @non_sve_caller_non_sve_callee_high_range() {
450 ; CHECK-LABEL: non_sve_caller_non_sve_callee_high_range:
452 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
453 ; CHECK-NEXT: addvl sp, sp, #-2
454 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
455 ; CHECK-NEXT: .cfi_offset w30, -8
456 ; CHECK-NEXT: .cfi_offset w29, -16
457 ; CHECK-NEXT: movi d0, #0000000000000000
458 ; CHECK-NEXT: fmov s1, #1.00000000
459 ; CHECK-NEXT: addvl x0, sp, #1
460 ; CHECK-NEXT: fmov s2, #2.00000000
461 ; CHECK-NEXT: fmov s3, #3.00000000
462 ; CHECK-NEXT: mov x1, sp
463 ; CHECK-NEXT: fmov s4, #4.00000000
464 ; CHECK-NEXT: fmov s5, #5.00000000
465 ; CHECK-NEXT: fmov s6, #6.00000000
466 ; CHECK-NEXT: fmov s7, #7.00000000
467 ; CHECK-NEXT: bl non_sve_callee_high_range
468 ; CHECK-NEXT: addvl sp, sp, #2
469 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
471 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
475 define void @non_sve_caller_high_range_non_sve_callee_high_range(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1) {
476 ; CHECK-LABEL: non_sve_caller_high_range_non_sve_callee_high_range:
478 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
479 ; CHECK-NEXT: addvl sp, sp, #-2
480 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
481 ; CHECK-NEXT: .cfi_offset w30, -8
482 ; CHECK-NEXT: .cfi_offset w29, -16
483 ; CHECK-NEXT: ptrue p0.s
484 ; CHECK-NEXT: movi d0, #0000000000000000
485 ; CHECK-NEXT: fmov s1, #1.00000000
486 ; CHECK-NEXT: fmov s2, #2.00000000
487 ; CHECK-NEXT: fmov s3, #3.00000000
488 ; CHECK-NEXT: fmov s4, #4.00000000
489 ; CHECK-NEXT: fmov s5, #5.00000000
490 ; CHECK-NEXT: fmov s6, #6.00000000
491 ; CHECK-NEXT: fmov s7, #7.00000000
492 ; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0]
493 ; CHECK-NEXT: ld1w { z17.s }, p0/z, [x1]
494 ; CHECK-NEXT: addvl x0, sp, #1
495 ; CHECK-NEXT: mov x1, sp
496 ; CHECK-NEXT: st1w { z17.s }, p0, [sp]
497 ; CHECK-NEXT: st1w { z16.s }, p0, [sp, #1, mul vl]
498 ; CHECK-NEXT: bl non_sve_callee_high_range
499 ; CHECK-NEXT: addvl sp, sp, #2
500 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
502 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)
506 define <vscale x 4 x float> @sve_caller_non_sve_callee_high_range(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1) {
507 ; CHECK-LABEL: sve_caller_non_sve_callee_high_range:
509 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
510 ; CHECK-NEXT: addvl sp, sp, #-18
511 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
512 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
513 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
514 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
515 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
516 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
517 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
518 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
519 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
520 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
521 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
522 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
523 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
524 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
525 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
526 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
527 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
528 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
529 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
530 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
531 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
532 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
533 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
534 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
535 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
536 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
537 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
538 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
539 ; CHECK-NEXT: addvl sp, sp, #-3
540 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 168 * VG
541 ; CHECK-NEXT: .cfi_offset w30, -8
542 ; CHECK-NEXT: .cfi_offset w29, -16
543 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
544 ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
545 ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
546 ; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG
547 ; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG
548 ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
549 ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
550 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
551 ; CHECK-NEXT: ptrue p0.s
552 ; CHECK-NEXT: mov z25.d, z0.d
553 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
554 ; CHECK-NEXT: movi d0, #0000000000000000
555 ; CHECK-NEXT: mov z24.d, z1.d
556 ; CHECK-NEXT: addvl x0, sp, #2
557 ; CHECK-NEXT: fmov s1, #1.00000000
558 ; CHECK-NEXT: fmov s2, #2.00000000
559 ; CHECK-NEXT: addvl x1, sp, #1
560 ; CHECK-NEXT: fmov s3, #3.00000000
561 ; CHECK-NEXT: fmov s4, #4.00000000
562 ; CHECK-NEXT: fmov s5, #5.00000000
563 ; CHECK-NEXT: fmov s6, #6.00000000
564 ; CHECK-NEXT: fmov s7, #7.00000000
565 ; CHECK-NEXT: st1w { z24.s }, p0, [sp, #1, mul vl]
566 ; CHECK-NEXT: st1w { z25.s }, p0, [sp, #2, mul vl]
567 ; CHECK-NEXT: bl non_sve_callee_high_range
568 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
569 ; CHECK-NEXT: addvl sp, sp, #3
570 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
571 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
572 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
573 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
574 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
575 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
576 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
577 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
578 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
579 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
580 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
581 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
582 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
583 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
584 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
585 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
586 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
587 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
588 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
589 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
590 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
591 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
592 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
593 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
594 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
595 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
596 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
597 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
598 ; CHECK-NEXT: addvl sp, sp, #18
599 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
601 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)
602 ret <vscale x 4 x float> %v0
605 define <vscale x 4 x float> @sve_ret_caller_non_sve_callee_high_range() {
606 ; CHECK-LABEL: sve_ret_caller_non_sve_callee_high_range:
608 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
609 ; CHECK-NEXT: addvl sp, sp, #-18
610 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
611 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
612 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
613 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
614 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
615 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
616 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
617 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
618 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
619 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
620 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
621 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
622 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
623 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
624 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
625 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
626 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
627 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
628 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
629 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
630 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
631 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
632 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
633 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
634 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
635 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
636 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
637 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
638 ; CHECK-NEXT: addvl sp, sp, #-2
639 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG
640 ; CHECK-NEXT: .cfi_offset w30, -8
641 ; CHECK-NEXT: .cfi_offset w29, -16
642 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
643 ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
644 ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
645 ; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG
646 ; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG
647 ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
648 ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
649 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
650 ; CHECK-NEXT: movi d0, #0000000000000000
651 ; CHECK-NEXT: fmov s1, #1.00000000
652 ; CHECK-NEXT: addvl x0, sp, #1
653 ; CHECK-NEXT: fmov s2, #2.00000000
654 ; CHECK-NEXT: fmov s3, #3.00000000
655 ; CHECK-NEXT: mov x1, sp
656 ; CHECK-NEXT: fmov s4, #4.00000000
657 ; CHECK-NEXT: fmov s5, #5.00000000
658 ; CHECK-NEXT: fmov s6, #6.00000000
659 ; CHECK-NEXT: fmov s7, #7.00000000
660 ; CHECK-NEXT: bl non_sve_callee_high_range
661 ; CHECK-NEXT: addvl sp, sp, #2
662 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
663 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
664 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
665 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
666 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
667 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
668 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
669 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
670 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
671 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
672 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
673 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
674 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
675 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
676 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
677 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
678 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
679 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
680 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
681 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
682 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
683 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
684 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
685 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
686 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
687 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
688 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
689 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
690 ; CHECK-NEXT: addvl sp, sp, #18
691 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
693 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
694 ret <vscale x 4 x float> undef
697 declare void @func_f8_and_v0_passed_via_memory(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, <vscale x 4 x float> %v0)
698 define void @verify_all_operands_are_initialised() {
699 ; CHECK-LABEL: verify_all_operands_are_initialised:
701 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
702 ; CHECK-NEXT: addvl sp, sp, #-1
703 ; CHECK-NEXT: sub sp, sp, #16
704 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
705 ; CHECK-NEXT: .cfi_offset w30, -8
706 ; CHECK-NEXT: .cfi_offset w29, -16
707 ; CHECK-NEXT: ptrue p0.s
708 ; CHECK-NEXT: movi d0, #0000000000000000
709 ; CHECK-NEXT: fmov s1, #1.00000000
710 ; CHECK-NEXT: fmov z16.s, #9.00000000
711 ; CHECK-NEXT: mov w8, #1090519040 // =0x41000000
712 ; CHECK-NEXT: add x0, sp, #16
713 ; CHECK-NEXT: fmov s2, #2.00000000
714 ; CHECK-NEXT: fmov s3, #3.00000000
715 ; CHECK-NEXT: add x9, sp, #16
716 ; CHECK-NEXT: fmov s4, #4.00000000
717 ; CHECK-NEXT: fmov s5, #5.00000000
718 ; CHECK-NEXT: fmov s6, #6.00000000
719 ; CHECK-NEXT: fmov s7, #7.00000000
720 ; CHECK-NEXT: st1w { z16.s }, p0, [x9]
721 ; CHECK-NEXT: str w8, [sp]
722 ; CHECK-NEXT: bl func_f8_and_v0_passed_via_memory
723 ; CHECK-NEXT: addvl sp, sp, #1
724 ; CHECK-NEXT: add sp, sp, #16
725 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
727 call void @func_f8_and_v0_passed_via_memory(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 9.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer))
731 declare float @callee1(float, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 2 x double>)
732 declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, <vscale x 8 x double>, <vscale x 8 x double>)
733 declare float @callee3(float, float, <vscale x 8 x double>, <vscale x 6 x double>, <vscale x 2 x double>)
735 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
736 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
737 declare {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1>, ptr)
738 declare {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1>, ptr)
739 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr)
740 declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
741 declare <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double>, <vscale x 2 x double>, i64)
742 declare <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double>, <vscale x 2 x double>, i64)