1 // RUN: %clang_cc1 -O3 -triple aarch64 -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
2 // RUN: %clang_cc1 -O3 -triple arm64-apple-ios7.0 -target-abi darwinpcs -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN
3 // RUN: %clang_cc1 -O3 -triple aarch64-linux-gnu -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
5 // REQUIRES: aarch64-registered-target
11 typedef svfloat32_t fvec32
__attribute__((arm_sve_vector_bits(128)));
12 typedef svfloat64_t fvec64
__attribute__((arm_sve_vector_bits(128)));
13 typedef svbool_t bvec
__attribute__((arm_sve_vector_bits(128)));
14 typedef svmfloat8_t mfvec8
__attribute__((arm_sve_vector_bits(128)));
24 // Pure Scalable Type, needs 4 Z-regs, 2 P-regs
33 // Pure Scalable Type, 1 Z-reg
38 // Big PST, does not fit in registers.
47 // A small aggregate type
52 // CHECK: %struct.PST = type { <2 x i8>, <2 x double>, [2 x <4 x float>], <16 x i8>, <2 x i8> }
54 // Test argument passing of Pure Scalable Types by examining the generated
55 // LLVM IR function declarations. A PST argument in C/C++ should map to:
56 // a) an `ptr` argument, if passed indirectly through memory
57 // b) a series of scalable vector arguments, if passed via registers
59 // Simple argument passing, PST expanded into registers.
66 void test_argpass_simple(PST
*p
) {
67 void argpass_simple_callee(PST
);
68 argpass_simple_callee(*p
);
70 // CHECK-AAPCS: define dso_local void @test_argpass_simple(ptr nocapture noundef readonly %p)
71 // CHECK-AAPCS-NEXT: entry:
72 // CHECK-AAPCS-NEXT: %0 = load <2 x i8>, ptr %p, align 16
73 // CHECK-AAPCS-NEXT: %cast.scalable = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %0, i64 0)
74 // CHECK-AAPCS-NEXT: %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
75 // CHECK-AAPCS-NEXT: %2 = getelementptr inbounds nuw i8, ptr %p, i64 16
76 // CHECK-AAPCS-NEXT: %3 = load <2 x double>, ptr %2, align 16
77 // CHECK-AAPCS-NEXT: %cast.scalable1 = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %3, i64 0)
78 // CHECK-AAPCS-NEXT: %4 = getelementptr inbounds nuw i8, ptr %p, i64 32
79 // CHECK-AAPCS-NEXT: %5 = load <4 x float>, ptr %4, align 16
80 // CHECK-AAPCS-NEXT: %cast.scalable2 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %5, i64 0)
81 // CHECK-AAPCS-NEXT: %6 = getelementptr inbounds nuw i8, ptr %p, i64 48
82 // CHECK-AAPCS-NEXT: %7 = load <4 x float>, ptr %6, align 16
83 // CHECK-AAPCS-NEXT: %cast.scalable3 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %7, i64 0)
84 // CHECK-AAPCS-NEXT: %8 = getelementptr inbounds nuw i8, ptr %p, i64 64
85 // CHECK-AAPCS-NEXT: %9 = load <16 x i8>, ptr %8, align 16
86 // CHECK-AAPCS-NEXT: %cast.scalable4 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %9, i64 0)
87 // CHECK-AAPCS-NEXT: %10 = getelementptr inbounds nuw i8, ptr %p, i64 80
88 // CHECK-AAPCS-NEXT: %11 = load <2 x i8>, ptr %10, align 16
89 // CHECK-AAPCS-NEXT: %cast.scalable5 = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %11, i64 0)
90 // CHECK-AAPCS-NEXT: %12 = bitcast <vscale x 2 x i8> %cast.scalable5 to <vscale x 16 x i1>
91 // CHECK-AAPCS-NEXT: tail call void @argpass_simple_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12)
92 // CHECK-AAPCS-NEXT: ret void
94 // CHECK-AAPCS: declare void @argpass_simple_callee(<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
95 // CHECK-DARWIN: declare void @argpass_simple_callee(ptr noundef)
97 // Boundary case of using the last available Z-reg, PST expanded.
105 void test_argpass_last_z(PST
*p
) {
106 void argpass_last_z_callee(double, double, double, double, PST
);
107 argpass_last_z_callee(.0, .0, .0, .0, *p
);
109 // CHECK-AAPCS: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
110 // CHECK-DARWIN: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, ptr noundef)
113 // Like the above, but using a tuple type to occupy some registers.
121 void test_argpass_last_z_tuple(PST
*p
, svfloat64x4_t x
) {
122 void argpass_last_z_tuple_callee(svfloat64x4_t
, PST
);
123 argpass_last_z_tuple_callee(x
, *p
);
125 // CHECK-AAPCS: declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
126 // CHECK-DARWIN: declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, ptr noundef)
129 // Boundary case of using the last available P-reg, PST expanded.
137 void test_argpass_last_p(PST
*p
) {
138 void argpass_last_p_callee(svbool_t
, svcount_t
, PST
);
139 argpass_last_p_callee(svpfalse(), svpfalse_c(), *p
);
141 // CHECK-AAPCS: declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
142 // CHECK-DARWIN: declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), ptr noundef)
145 // Not enough Z-regs, push PST to memory and pass a pointer, Z-regs and
146 // P-regs still available for other arguments
152 // *p -> memory, address -> x1
156 void test_argpass_no_z(PST
*p
, double dummy
, svmfloat8_t u
, int8x16_t v
, mfloat8x16_t w
) {
157 void argpass_no_z_callee(svmfloat8_t
, int8x16_t
, mfloat8x16_t
, double, double, int, PST
, int, double, svbool_t
);
158 argpass_no_z_callee(u
, v
, w
, .0, .0, 1, *p
, 2, 3.0, svptrue_b64());
160 // CHECK: declare void @argpass_no_z_callee(<vscale x 16 x i8>, <16 x i8> noundef, <16 x i8>, double noundef, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
163 // Like the above, using a tuple to occupy some registers.
167 // *p -> memory, address -> x1
171 void test_argpass_no_z_tuple_f64(PST
*p
, float dummy
, svfloat64x4_t x
) {
172 void argpass_no_z_tuple_f64_callee(svfloat64x4_t
, double, int, PST
, int,
174 argpass_no_z_tuple_f64_callee(x
, .0, 1, *p
, 2, 3.0, svptrue_b64());
176 // CHECK: declare void @argpass_no_z_tuple_f64_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
179 // Likewise, using a different tuple.
183 // *p -> memory, address -> x1
187 void test_argpass_no_z_tuple_mfp8(PST
*p
, float dummy
, svmfloat8x4_t x
) {
188 void argpass_no_z_tuple_mfp8_callee(svmfloat8x4_t
, double, int, PST
, int,
190 argpass_no_z_tuple_mfp8_callee(x
, .0, 1, *p
, 2, 3.0, svptrue_b64());
192 // CHECK: declare void @argpass_no_z_tuple_mfp8_callee(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
195 // Not enough Z-regs (consumed by a HFA), PST passed indirectly
199 // *p -> memory, address -> x1
203 void test_argpass_no_z_hfa(HFA
*h
, PST
*p
) {
204 void argpass_no_z_hfa_callee(double, HFA
, int, PST
, int, svbool_t
);
205 argpass_no_z_hfa_callee(.0, *h
, 1, *p
, 2, svptrue_b64());
207 // CHECK-AAPCS: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float] alignstack(8), i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
208 // CHECK-DARWIN: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float], i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
210 // Not enough Z-regs (consumed by a HVA), PST passed indirectly
214 // *p -> memory, address -> x1
218 void test_argpass_no_z_hva(HVA
*h
, PST
*p
) {
219 void argpass_no_z_hva_callee(double, HVA
, int, PST
, int, svbool_t
);
220 argpass_no_z_hva_callee(.0, *h
, 1, *p
, 2, svptrue_b64());
222 // CHECK-AAPCS: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>] alignstack(16), i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
223 // CHECK-DARWIN: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>], i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
225 // Not enough P-regs, PST passed indirectly, Z-regs and P-regs still available.
228 // *p -> memory, address -> x1
232 void test_argpass_no_p(PST
*p
) {
233 void argpass_no_p_callee(svbool_t
, svbool_t
, svbool_t
, int, PST
, int, double, svbool_t
);
234 argpass_no_p_callee(svptrue_b8(), svptrue_b16(), svptrue_b32(), 1, *p
, 2, 3.0, svptrue_b64());
236 // CHECK: declare void @argpass_no_p_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
239 // Like above, using a tuple to occupy some registers.
240 // P-regs still available.
244 // *p -> memory, address -> x1
248 void test_argpass_no_p_tuple(PST
*p
, svbool_t u
, svboolx2_t v
) {
249 void argpass_no_p_tuple_callee(svboolx2_t
, svbool_t
, int, PST
, int, double,
251 argpass_no_p_tuple_callee(v
, u
, 1, *p
, 2, 3.0, svptrue_b64());
253 // CHECK: declare void @argpass_no_p_tuple_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
256 // HFAs go back-to-back to memory, afterwards Z-regs not available, PST passed indirectly.
259 // *p -> memory, address -> x0
262 void test_after_hfa(HFA
*h
, PST
*p
) {
263 void after_hfa_callee(double, double, double, double, double, HFA
, PST
, HFA
, svbool_t
);
264 after_hfa_callee(.0, .0, .0, .0, .0, *h
, *p
, *h
, svpfalse());
266 // CHECK-AAPCS: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float] alignstack(8), ptr noundef, [4 x float] alignstack(8), <vscale x 16 x i1>)
267 // CHECK-DARWIN: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float], ptr noundef, [4 x float], <vscale x 16 x i1>)
269 // Small PST, not enough registers, passed indirectly, unlike other small
273 // *p -> memory, address -> x2
275 // 2.0 -> memory (next to the above)
276 void test_small_pst(SmallPST
*p
, SmallAgg
*s
) {
277 void small_pst_callee(SmallAgg
, double, double, double, double, double, double, double, double, double, SmallPST
, double);
278 small_pst_callee(*s
, .0, .0, .0, .0, .0, .0, .0, .0, 1.0, *p
, 2.0);
280 // CHECK-AAPCS: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef, double noundef)
281 // CHECK-DARWIN: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, i128, double noundef)
284 // Simple return, PST expanded to registers
291 PST
test_return(PST
*p
) {
294 // CHECK-AAPCS: define dso_local <{ <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1> }> @test_return(ptr
295 // CHECK-DARWIN: define void @test_return(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.PST) align 16 initializes((0, 96)) %agg.result, ptr nocapture noundef readonly %p)
297 // Corner case of 1-element aggregate
299 SmallPST
test_return_small_pst(SmallPST
*p
) {
302 // CHECK-AAPCS: define dso_local <vscale x 4 x float> @test_return_small_pst(ptr
303 // CHECK-DARWIN: define i128 @test_return_small_pst(ptr nocapture noundef readonly %p)
306 // Big PST, returned indirectly
308 BigPST
test_return_big_pst(BigPST
*p
) {
311 // CHECK-AAPCS: define dso_local void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 initializes((0, 176)) %agg.result, ptr nocapture noundef readonly %p)
312 // CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 initializes((0, 176)) %agg.result, ptr nocapture noundef readonly %p)
314 // Variadic arguments are unnamed, PST passed indirectly.
315 // (Passing SVE types to a variadic function currently unsupported by
316 // the AArch64 backend)
323 // *q -> memory, address -> x1
324 void test_pass_variadic(PST
*p
, PST
*q
) {
325 void pass_variadic_callee(PST
, ...);
326 pass_variadic_callee(*p
, *q
);
328 // CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
329 // CHECK-AAPCS: call void (<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...) @pass_variadic_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12, ptr noundef nonnull %byval-temp)
331 // CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %p, i64 96, i1 false)
332 // CHECK-DARWIN: call void @llvm.lifetime.start.p0(i64 96, ptr nonnull %byval-temp1)
333 // CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp1, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
334 // CHECK-DARWIN: call void (ptr, ...) @pass_variadic_callee(ptr noundef nonnull %byval-temp, ptr noundef nonnull %byval-temp1)
337 // Test passing a small PST, still passed indirectly, despite being <= 128 bits
338 void test_small_pst_variadic(SmallPST
*p
) {
339 void small_pst_variadic_callee(int, ...);
340 small_pst_variadic_callee(0, *p
);
342 // CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %byval-temp, ptr noundef nonnull align 16 dereferenceable(16) %p, i64 16, i1 false)
343 // CHECK-AAPCS: call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, ptr noundef nonnull %byval-temp)
345 // CHECK-DARWIN: %0 = load i128, ptr %p, align 16
346 // CHECK-DARWIN: tail call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, i128 %0)
348 // Test handling of a PST argument when passed in registers, from the callee side.
349 void test_argpass_callee_side(PST v
) {
353 // CHECK-AAPCS: define dso_local void @test_argpass_callee_side(<vscale x 16 x i1> %0, <vscale x 2 x double> %.coerce1, <vscale x 4 x float> %.coerce3, <vscale x 4 x float> %.coerce5, <vscale x 16 x i8> %.coerce7, <vscale x 16 x i1> %1)
354 // CHECK-AAPCS-NEXT: entry:
355 // CHECK-AAPCS-NEXT: %v = alloca %struct.PST, align 16
356 // CHECK-AAPCS-NEXT: %.coerce = bitcast <vscale x 16 x i1> %0 to <vscale x 2 x i8>
357 // CHECK-AAPCS-NEXT: %cast.fixed = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %.coerce, i64 0)
358 // CHECK-AAPCS-NEXT: store <2 x i8> %cast.fixed, ptr %v, align 16
359 // CHECK-AAPCS-NEXT: %2 = getelementptr inbounds nuw i8, ptr %v, i64 16
360 // CHECK-AAPCS-NEXT: %cast.fixed2 = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> %.coerce1, i64 0)
361 // CHECK-AAPCS-NEXT: store <2 x double> %cast.fixed2, ptr %2, align 16
362 // CHECK-AAPCS-NEXT: %3 = getelementptr inbounds nuw i8, ptr %v, i64 32
363 // CHECK-AAPCS-NEXT: %cast.fixed4 = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> %.coerce3, i64 0)
364 // CHECK-AAPCS-NEXT: store <4 x float> %cast.fixed4, ptr %3, align 16
365 // CHECK-AAPCS-NEXT: %4 = getelementptr inbounds nuw i8, ptr %v, i64 48
366 // CHECK-AAPCS-NEXT: %cast.fixed6 = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> %.coerce5, i64 0)
367 // CHECK-AAPCS-NEXT: store <4 x float> %cast.fixed6, ptr %4, align 16
368 // CHECK-AAPCS-NEXT: %5 = getelementptr inbounds nuw i8, ptr %v, i64 64
369 // CHECK-AAPCS-NEXT: %cast.fixed8 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %.coerce7, i64 0)
370 // CHECK-AAPCS-NEXT: store <16 x i8> %cast.fixed8, ptr %5, align 16
371 // CHECK-AAPCS-NEXT: %6 = getelementptr inbounds nuw i8, ptr %v, i64 80
372 // CHECK-AAPCS-NEXT: %.coerce9 = bitcast <vscale x 16 x i1> %1 to <vscale x 2 x i8>
373 // CHECK-AAPCS-NEXT: %cast.fixed10 = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %.coerce9, i64 0)
374 // CHECK-AAPCS-NEXT: store <2 x i8> %cast.fixed10, ptr %6, align 16
375 // CHECK-AAPCS-NEXT: call void @use(ptr noundef nonnull %v)
376 // CHECK-AAPCS-NEXT: ret void
377 // CHECK-AAPCS-NEXT: }
379 // Test va_arg operation
383 void test_va_arg(int n
, ...) {
386 PST v
= va_arg(ap
, PST
);
389 void use1(bvec
, fvec32
);
392 // CHECK-AAPCS: define dso_local void @test_va_arg(i32 noundef %n, ...)
393 // CHECK-AAPCS-NEXT: entry:
394 // CHECK-AAPCS-NEXT: %ap = alloca %struct.__va_list, align 8
395 // CHECK-AAPCS-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap)
396 // CHECK-AAPCS-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
397 // CHECK-AAPCS-NEXT: %gr_offs_p = getelementptr inbounds nuw i8, ptr %ap, i64 24
398 // CHECK-AAPCS-NEXT: %gr_offs = load i32, ptr %gr_offs_p, align 8
399 // CHECK-AAPCS-NEXT: %0 = icmp sgt i32 %gr_offs, -1
400 // CHECK-AAPCS-NEXT: br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
401 // CHECK-AAPCS-EMPTY:
402 // CHECK-AAPCS-NEXT: vaarg.maybe_reg: ; preds = %entry
404 // Increment by 8, size of the pointer to the argument value, not size of the argument value itself.
406 // CHECK-AAPCS-NEXT: %new_reg_offs = add nsw i32 %gr_offs, 8
407 // CHECK-AAPCS-NEXT: store i32 %new_reg_offs, ptr %gr_offs_p, align 8
408 // CHECK-AAPCS-NEXT: %inreg = icmp samesign ult i32 %gr_offs, -7
409 // CHECK-AAPCS-NEXT: br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
410 // CHECK-AAPCS-EMPTY:
411 // CHECK-AAPCS-NEXT: vaarg.in_reg: ; preds = %vaarg.maybe_reg
412 // CHECK-AAPCS-NEXT: %reg_top_p = getelementptr inbounds nuw i8, ptr %ap, i64 8
413 // CHECK-AAPCS-NEXT: %reg_top = load ptr, ptr %reg_top_p, align 8
414 // CHECK-AAPCS-NEXT: %1 = sext i32 %gr_offs to i64
415 // CHECK-AAPCS-NEXT: %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
416 // CHECK-AAPCS-NEXT: br label %vaarg.end
417 // CHECK-AAPCS-EMPTY:
418 // CHECK-AAPCS-NEXT: vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry
419 // CHECK-AAPCS-NEXT: %stack = load ptr, ptr %ap, align 8
420 // CHECK-AAPCS-NEXT: %new_stack = getelementptr inbounds nuw i8, ptr %stack, i64 8
421 // CHECK-AAPCS-NEXT: store ptr %new_stack, ptr %ap, align 8
422 // CHECK-AAPCS-NEXT: br label %vaarg.end
423 // CHECK-AAPCS-EMPTY:
424 // CHECK-AAPCS-NEXT: vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
425 // CHECK-AAPCS-NEXT: %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
427 // Extra indirection, for a composite passed indirectly.
428 // CHECK-AAPCS-NEXT: %vaarg.addr = load ptr, ptr %vaargs.addr, align 8
430 // CHECK-AAPCS-NEXT: %v.sroa.0.0.copyload = load <2 x i8>, ptr %vaarg.addr, align 16
431 // CHECK-AAPCS-NEXT: %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds nuw i8, ptr %vaarg.addr, i64 48
432 // CHECK-AAPCS-NEXT: %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16
433 // CHECK-AAPCS-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap)
434 // CHECK-AAPCS-NEXT: %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %v.sroa.0.0.copyload, i64 0)
435 // CHECK-AAPCS-NEXT: %3 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
436 // CHECK-AAPCS-NEXT: %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %v.sroa.43.0.copyload, i64 0)
437 // CHECK-AAPCS-NEXT: call void @use1(<vscale x 16 x i1> noundef %3, <vscale x 4 x float> noundef %cast.scalable2)
438 // CHECK-AAPCS-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
439 // CHECK-AAPCS-NEXT: ret void
440 // CHECK-AAPCS-NEXT: }
442 // CHECK-DARWIN: define void @test_va_arg(i32 noundef %n, ...)
443 // CHECK-DARWIN-NEXT: entry:
444 // CHECK-DARWIN-NEXT: %ap = alloca ptr, align 8
445 // CHECK-DARWIN-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ap)
446 // CHECK-DARWIN-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
447 // CHECK-DARWIN-NEXT: %argp.cur = load ptr, ptr %ap, align 8
448 // CHECK-DARWIN-NEXT: %argp.next = getelementptr inbounds nuw i8, ptr %argp.cur, i64 8
449 // CHECK-DARWIN-NEXT: store ptr %argp.next, ptr %ap, align 8
450 // CHECK-DARWIN-NEXT: %0 = load ptr, ptr %argp.cur, align 8
451 // CHECK-DARWIN-NEXT: %v.sroa.0.0.copyload = load <2 x i8>, ptr %0, align 16
452 // CHECK-DARWIN-NEXT: %v.sroa.43.0..sroa_idx = getelementptr inbounds nuw i8, ptr %0, i64 48
453 // CHECK-DARWIN-NEXT: %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16
454 // CHECK-DARWIN-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap)
455 // CHECK-DARWIN-NEXT: %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %v.sroa.0.0.copyload, i64 0)
456 // CHECK-DARWIN-NEXT: %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
457 // CHECK-DARWIN-NEXT: %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %v.sroa.43.0.copyload, i64 0)
458 // CHECK-DARWIN-NEXT: call void @use1(<vscale x 16 x i1> noundef %1, <vscale x 4 x float> noundef %cast.scalable2)
459 // CHECK-DARWIN-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
460 // CHECK-DARWIN-NEXT: ret void
461 // CHECK-DARWIN-NEXT: }
463 // Regression test for incorrect passing of SVE vector tuples
464 // The whole `y` need to be passed indirectly.
465 void test_tuple_reg_count(svfloat32_t x
, svfloat32x2_t y
) {
466 void test_tuple_reg_count_callee(svfloat32_t
, svfloat32_t
, svfloat32_t
, svfloat32_t
,
467 svfloat32_t
, svfloat32_t
, svfloat32_t
, svfloat32x2_t
);
468 test_tuple_reg_count_callee(x
, x
, x
, x
, x
, x
, x
, y
);
470 // CHECK-AAPCS: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, ptr noundef)
471 // CHECK-DARWIN: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
473 // Regression test for incorrect passing of SVE vector tuples
474 // The whole `y` need to be passed indirectly.
475 void test_tuple_reg_count_bool(svboolx4_t x
, svboolx4_t y
) {
476 void test_tuple_reg_count_bool_callee(svboolx4_t
, svboolx4_t
);
477 test_tuple_reg_count_bool_callee(x
, y
);
479 // CHECK-AAPCS: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, ptr noundef)
480 // CHECK-DARWIN: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)