1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
2 // RUN: %clang_cc1 -triple aarch64_be-none-linux-gnu -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
4 // REQUIRES: aarch64-registered-target || arm-registered-target
6 // CHECK: define{{.*}} signext i8 @f0()
11 // Struct as return type. Aggregates <= 16 bytes are passed directly. For BE,
12 // return values are round up to 64 bits.
14 // CHECK-LE: define{{.*}} i8 @f1()
15 // CHECK-BE: define{{.*}} i64 @f1()
16 struct s1
{ char f0
; };
19 // CHECK-LE: define{{.*}} i16 @f2()
20 // CHECK-BE: define{{.*}} i64 @f2()
21 struct s2
{ short f0
; };
24 // CHECK-LE: define{{.*}} i32 @f3()
25 // CHECK-BE: define{{.*}} i64 @f3()
26 struct s3
{ int f0
; };
29 // CHECK-LE: define{{.*}} i32 @f4()
30 // CHECK-BE: define{{.*}} i64 @f4()
31 struct s4
{ struct s4_0
{ int f0
; } f0
; };
34 // CHECK-LE: define{{.*}} i32 @f5()
35 // CHECK-BE: define{{.*}} i64 @f5()
36 struct s5
{ struct { } f0
; int f1
; };
39 // CHECK-LE: define{{.*}} i32 @f6()
40 // CHECK-BE: define{{.*}} i64 @f6()
41 struct s6
{ int f0
[1]; };
44 // CHECK: define{{.*}} void @f7()
45 struct s7
{ struct { int : 0; } f0
; };
48 // CHECK: define{{.*}} void @f8()
49 struct s8
{ struct { int : 0; } f0
[1]; };
52 // CHECK-LE: define{{.*}} i32 @f9()
53 // CHECK-BE: define{{.*}} i64 @f9()
54 struct s9
{ int f0
; int : 0; };
57 // CHECK-LE: define{{.*}} i32 @f10()
58 // CHECK-BE: define{{.*}} i64 @f10()
59 struct s10
{ int f0
; int : 0; int : 0; };
60 struct s10
f10(void) {}
62 // CHECK-LE: define{{.*}} i32 @f11()
63 // CHECK-BE: define{{.*}} i64 @f11()
64 struct s11
{ int : 0; int f0
; };
65 struct s11
f11(void) {}
67 // CHECK-LE: define{{.*}} i24 @f11_packed()
68 // CHECK-BE: define{{.*}} i64 @f11_packed()
69 struct s11_packed
{ char c
; short s
} __attribute__((packed
));
70 struct s11_packed
f11_packed(void) { }
72 // CHECK-LE: define{{.*}} i32 @f11_not_packed()
73 // CHECK-BE: define{{.*}} i64 @f11_not_packed()
74 struct s11_not_packed
{ char c
; short s
; };
75 struct s11_not_packed
f11_not_packed(void) { }
77 // CHECK-LE: define{{.*}} i32 @f12()
78 // CHECK-BE: define{{.*}} i64 @f12()
79 union u12
{ char f0
; short f1
; int f2
; };
80 union u12
f12(void) {}
82 // Homogeneous Aggregate as return type will be passed directly.
83 // CHECK: define{{.*}} %struct.s13 @f13()
84 struct s13
{ float f0
; };
85 struct s13
f13(void) {}
86 // CHECK: define{{.*}} %union.u14 @f14()
87 union u14
{ float f0
; };
88 union u14
f14(void) {}
90 // CHECK: define{{.*}} void @f15()
91 void f15(struct s7 a0
) {}
93 // CHECK: define{{.*}} void @f16()
94 void f16(struct s8 a0
) {}
96 // CHECK-LE: define{{.*}} i32 @f17()
97 // CHECK-BE: define{{.*}} i64 @f17()
98 struct s17
{ short f0
: 13; char f1
: 4; };
99 struct s17
f17(void) {}
101 // CHECK-LE: define{{.*}} i32 @f18()
102 // CHECK-BE: define{{.*}} i64 @f18()
103 struct s18
{ short f0
; char f1
: 4; };
104 struct s18
f18(void) {}
106 // CHECK-LE: define{{.*}} i32 @f19()
107 // CHECK-BE: define{{.*}} i64 @f19()
108 struct s19
{ int f0
; struct s8 f1
; };
109 struct s19
f19(void) {}
111 // CHECK-LE: define{{.*}} i32 @f20()
112 // CHECK-BE: define{{.*}} i64 @f20()
113 struct s20
{ struct s8 f1
; int f0
; };
114 struct s20
f20(void) {}
116 // CHECK-LE: define{{.*}} i32 @f21()
117 // CHECK-BE: define{{.*}} i64 @f21()
118 struct s21
{ struct {} f1
; int f0
: 4; };
119 struct s21
f21(void) {}
121 // CHECK-LE: define{{.*}} i16 @f22()
122 // CHECK-LE: define{{.*}} i32 @f23()
123 // CHECK-BE: define{{.*}} i64 @f22()
124 // CHECK-BE: define{{.*}} i64 @f23()
125 // CHECK: define{{.*}} i64 @f24()
126 // CHECK: define{{.*}} [2 x i64] @f25()
127 // CHECK: define{{.*}} { float, float } @f26()
128 // CHECK: define{{.*}} { double, double } @f27()
129 _Complex
char f22(void) {}
130 _Complex
short f23(void) {}
131 _Complex
int f24(void) {}
132 _Complex
long long f25(void) {}
133 _Complex
float f26(void) {}
134 _Complex
double f27(void) {}
136 // CHECK-LE: define{{.*}} i16 @f28()
137 // CHECK-BE: define{{.*}} i64 @f28()
138 struct s28
{ _Complex
char f0
; };
141 // CHECK-LE: define{{.*}} i32 @f29()
142 // CHECK-BE: define{{.*}} i64 @f29()
143 struct s29
{ _Complex
short f0
; };
146 // CHECK: define{{.*}} i64 @f30()
147 struct s30
{ _Complex
int f0
; };
150 struct s31
{ char x
; };
151 void f31(struct s31 s
) { }
152 // CHECK: define{{.*}} void @f31(i64 %s.coerce)
153 // CHECK: %s = alloca %struct.s31, align 1
154 // CHECK-BE: %coerce.highbits = lshr i64 %s.coerce, 56
155 // CHECK-BE: trunc i64 %coerce.highbits to i8
156 // CHECK-LE: trunc i64 %s.coerce to i8
157 // CHECK: store i8 %{{.*}},
159 struct s32
{ double x
; };
160 void f32(struct s32 s
) { }
161 // CHECK: @f32([1 x double] %{{.*}})
163 // A composite type larger than 16 bytes should be passed indirectly.
164 struct s33
{ char buf
[32*32]; };
165 void f33(struct s33 s
) { }
166 // CHECK: define{{.*}} void @f33(ptr noundef %s)
168 struct s34
{ char c
; };
169 void f34(struct s34 s
);
170 void g34(struct s34
*s
) { f34(*s
); }
171 // CHECK: @g34(ptr noundef %s)
172 // CHECK: %[[a:.*]] = load i8, ptr %{{.*}}
173 // CHECK: zext i8 %[[a]] to i64
174 // CHECK: call void @f34(i64 %{{.*}})
177 * Check that va_arg accesses stack according to ABI alignment
179 long long t1(int i
, ...) {
181 __builtin_va_list ap
;
182 __builtin_va_start(ap
, i
);
183 // CHECK-NOT: add i32 %{{.*}} 7
184 // CHECK-NOT: and i32 %{{.*}} -8
185 long long ll
= __builtin_va_arg(ap
, long long);
186 __builtin_va_end(ap
);
189 double t2(int i
, ...) {
191 __builtin_va_list ap
;
192 __builtin_va_start(ap
, i
);
193 // CHECK-NOT: add i32 %{{.*}} 7
194 // CHECK-NOT: and i32 %{{.*}} -8
195 double ll
= __builtin_va_arg(ap
, double);
196 __builtin_va_end(ap
);
199 _Bool
t3(int i
, ...) {
201 __builtin_va_list ap
;
202 __builtin_va_start(ap
, i
);
203 // CHECK: %0 = va_arg ptr %ap, i8
204 // CHECK-NEXT: store i8 %0, ptr %varet, align 1
205 _Bool b
= __builtin_va_arg(ap
, _Bool
);
206 __builtin_va_end(ap
);
210 #include <arm_neon.h>
212 // Homogeneous Vector Aggregate as return type and argument type.
213 // CHECK: define{{.*}} %struct.int8x16x2_t @f0_0(<16 x i8> noundef %{{.*}}, <16 x i8> noundef %{{.*}})
214 int8x16x2_t
f0_0(int8x16_t a0
, int8x16_t a1
) {
215 return vzipq_s8(a0
, a1
);
218 // Test direct vector passing.
219 typedef float T_float32x2
__attribute__ ((__vector_size__ (8)));
220 typedef float T_float32x4
__attribute__ ((__vector_size__ (16)));
221 typedef float T_float32x8
__attribute__ ((__vector_size__ (32)));
222 typedef float T_float32x16
__attribute__ ((__vector_size__ (64)));
224 // CHECK: define{{.*}} <2 x float> @f1_0(<2 x float> noundef %{{.*}})
225 T_float32x2
f1_0(T_float32x2 a0
) { return a0
; }
226 // CHECK: define{{.*}} <4 x float> @f1_1(<4 x float> noundef %{{.*}})
227 T_float32x4
f1_1(T_float32x4 a0
) { return a0
; }
228 // Vector with length bigger than 16-byte is illegal and is passed indirectly.
229 // CHECK: define{{.*}} void @f1_2(ptr noalias sret(<8 x float>) align 16 %{{.*}}, ptr noundef %0)
230 T_float32x8
f1_2(T_float32x8 a0
) { return a0
; }
231 // CHECK: define{{.*}} void @f1_3(ptr noalias sret(<16 x float>) align 16 %{{.*}}, ptr noundef %0)
232 T_float32x16
f1_3(T_float32x16 a0
) { return a0
; }
234 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and
235 // aggregates with size > 16 bytes.
238 float v
[4]; //Testing HFA.
239 } __attribute__((aligned(16)));
240 typedef struct s35 s35_with_align
;
242 typedef __attribute__((neon_vector_type(4))) float float32x4_t
;
243 float32x4_t
f35(int i
, s35_with_align s1
, s35_with_align s2
) {
244 // CHECK: define{{.*}} <4 x float> @f35(i32 noundef %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
245 // CHECK: %s1 = alloca %struct.s35, align 16
246 // CHECK: %s2 = alloca %struct.s35, align 16
247 // CHECK: load <4 x float>, ptr %s1, align 16
248 // CHECK: load <4 x float>, ptr %s2, align 16
249 float32x4_t v
= vaddq_f32(*(float32x4_t
*)&s1
,
250 *(float32x4_t
*)&s2
);
256 int v
[4]; //Testing 16-byte aggregate.
257 } __attribute__((aligned(16)));
258 typedef struct s36 s36_with_align
;
260 typedef __attribute__((neon_vector_type(4))) int int32x4_t
;
261 int32x4_t
f36(int i
, s36_with_align s1
, s36_with_align s2
) {
262 // CHECK: define{{.*}} <4 x i32> @f36(i32 noundef %i, i128 %s1.coerce, i128 %s2.coerce)
263 // CHECK: %s1 = alloca %struct.s36, align 16
264 // CHECK: %s2 = alloca %struct.s36, align 16
265 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
266 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
267 // CHECK: load <4 x i32>, ptr %s1, align 16
268 // CHECK: load <4 x i32>, ptr %s2, align 16
269 int32x4_t v
= vaddq_s32(*(int32x4_t
*)&s1
,
276 int v
[18]; //Testing large aggregate.
277 } __attribute__((aligned(16)));
278 typedef struct s37 s37_with_align
;
280 int32x4_t
f37(int i
, s37_with_align s1
, s37_with_align s2
) {
281 // CHECK: define{{.*}} <4 x i32> @f37(i32 noundef %i, ptr noundef %s1, ptr noundef %s2)
282 // CHECK: load <4 x i32>, ptr %s1, align 16
283 // CHECK: load <4 x i32>, ptr %s2, align 16
284 int32x4_t v
= vaddq_s32(*(int32x4_t
*)&s1
,
289 int32x4_t
caller37() {
291 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16
292 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16
293 // CHECK: call void @llvm.memcpy
294 // CHECK: call void @llvm.memcpy
295 // CHECK: call <4 x i32> @f37(i32 noundef 3, ptr noundef %[[a]], ptr noundef %[[b]])
296 return f37(3, g37
, g37
);
299 // Test passing structs with size < 8, < 16 and > 16
300 // with alignment of 16 and without
302 // structs with size <= 8 bytes, without alignment attribute
303 // passed as i64 regardless of the align attribute
309 typedef struct s38 s38_no_align
;
310 // passing structs in registers
311 __attribute__ ((noinline
))
312 int f38(int i
, s38_no_align s1
, s38_no_align s2
) {
313 // CHECK: define{{.*}} i32 @f38(i32 noundef %i, i64 %s1.coerce, i64 %s2.coerce)
314 // CHECK: %s1 = alloca %struct.s38, align 4
315 // CHECK: %s2 = alloca %struct.s38, align 4
316 // CHECK: store i64 %s1.coerce, ptr %{{.*}}, align 4
317 // CHECK: store i64 %s2.coerce, ptr %{{.*}}, align 4
318 // CHECK: getelementptr inbounds %struct.s38, ptr %s1, i32 0, i32 0
319 // CHECK: getelementptr inbounds %struct.s38, ptr %s2, i32 0, i32 0
320 // CHECK: getelementptr inbounds %struct.s38, ptr %s1, i32 0, i32 1
321 // CHECK: getelementptr inbounds %struct.s38, ptr %s2, i32 0, i32 1
322 return s1
.i
+ s2
.i
+ i
+ s1
.s
+ s2
.s
;
327 // CHECK: define{{.*}} i32 @caller38()
328 // CHECK: %[[a:.*]] = load i64, ptr @g38, align 4
329 // CHECK: %[[b:.*]] = load i64, ptr @g38_2, align 4
330 // CHECK: call i32 @f38(i32 noundef 3, i64 %[[a]], i64 %[[b]])
331 return f38(3, g38
, g38_2
);
333 // passing structs on stack
334 __attribute__ ((noinline
))
335 int f38_stack(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
, int i8
,
336 int i9
, s38_no_align s1
, s38_no_align s2
) {
337 // CHECK: define{{.*}} i32 @f38_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, i64 %s1.coerce, i64 %s2.coerce)
338 // CHECK: %s1 = alloca %struct.s38, align 4
339 // CHECK: %s2 = alloca %struct.s38, align 4
340 // CHECK: store i64 %s1.coerce, ptr %{{.*}}, align 4
341 // CHECK: store i64 %s2.coerce, ptr %{{.*}}, align 4
342 // CHECK: getelementptr inbounds %struct.s38, ptr %s1, i32 0, i32 0
343 // CHECK: getelementptr inbounds %struct.s38, ptr %s2, i32 0, i32 0
344 // CHECK: getelementptr inbounds %struct.s38, ptr %s1, i32 0, i32 1
345 // CHECK: getelementptr inbounds %struct.s38, ptr %s2, i32 0, i32 1
346 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ i8
+ i9
+ s1
.s
+ s2
.s
;
348 int caller38_stack() {
349 // CHECK: define{{.*}} i32 @caller38_stack()
350 // CHECK: %[[a:.*]] = load i64, ptr @g38, align 4
351 // CHECK: %[[b:.*]] = load i64, ptr @g38_2, align 4
352 // CHECK: call i32 @f38_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, i64 %[[a]], i64 %[[b]])
353 return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38
, g38_2
);
356 // structs with size <= 8 bytes, with alignment attribute
361 } __attribute__((aligned(16)));
362 typedef struct s39 s39_with_align
;
363 // passing aligned structs in registers
364 __attribute__ ((noinline
))
365 int f39(int i
, s39_with_align s1
, s39_with_align s2
) {
366 // CHECK: define{{.*}} i32 @f39(i32 noundef %i, i128 %s1.coerce, i128 %s2.coerce)
367 // CHECK: %s1 = alloca %struct.s39, align 16
368 // CHECK: %s2 = alloca %struct.s39, align 16
369 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
370 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
371 // CHECK: getelementptr inbounds %struct.s39, ptr %s1, i32 0, i32 0
372 // CHECK: getelementptr inbounds %struct.s39, ptr %s2, i32 0, i32 0
373 // CHECK: getelementptr inbounds %struct.s39, ptr %s1, i32 0, i32 1
374 // CHECK: getelementptr inbounds %struct.s39, ptr %s2, i32 0, i32 1
375 return s1
.i
+ s2
.i
+ i
+ s1
.s
+ s2
.s
;
378 s39_with_align g39_2
;
380 // CHECK: define{{.*}} i32 @caller39()
381 // CHECK: %[[a:.*]] = load i128, ptr @g39, align 16
382 // CHECK: %[[b:.*]] = load i128, ptr @g39_2, align 16
383 // CHECK: call i32 @f39(i32 noundef 3, i128 %[[a]], i128 %[[b]])
384 return f39(3, g39
, g39_2
);
386 // passing aligned structs on stack
387 __attribute__ ((noinline
))
388 int f39_stack(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
, int i8
,
389 int i9
, s39_with_align s1
, s39_with_align s2
) {
390 // CHECK: define{{.*}} i32 @f39_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, i128 %s1.coerce, i128 %s2.coerce)
391 // CHECK: %s1 = alloca %struct.s39, align 16
392 // CHECK: %s2 = alloca %struct.s39, align 16
393 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
394 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
395 // CHECK: getelementptr inbounds %struct.s39, ptr %s1, i32 0, i32 0
396 // CHECK: getelementptr inbounds %struct.s39, ptr %s2, i32 0, i32 0
397 // CHECK: getelementptr inbounds %struct.s39, ptr %s1, i32 0, i32 1
398 // CHECK: getelementptr inbounds %struct.s39, ptr %s2, i32 0, i32 1
399 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ i8
+ i9
+ s1
.s
+ s2
.s
;
401 int caller39_stack() {
402 // CHECK: define{{.*}} i32 @caller39_stack()
403 // CHECK: %[[a:.*]] = load i128, ptr @g39, align 16
404 // CHECK: %[[b:.*]] = load i128, ptr @g39_2, align 16
405 // CHECK: call i32 @f39_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, i128 %[[a]], i128 %[[b]])
406 return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39
, g39_2
);
409 // structs with size <= 16 bytes, without alignment attribute
417 typedef struct s40 s40_no_align
;
418 // passing structs in registers
419 __attribute__ ((noinline
))
420 int f40(int i
, s40_no_align s1
, s40_no_align s2
) {
421 // CHECK: define{{.*}} i32 @f40(i32 noundef %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
422 // CHECK: %s1 = alloca %struct.s40, align 4
423 // CHECK: %s2 = alloca %struct.s40, align 4
424 // CHECK: store [2 x i64] %s1.coerce, ptr %{{.*}}, align 4
425 // CHECK: store [2 x i64] %s2.coerce, ptr %{{.*}}, align 4
426 // CHECK: getelementptr inbounds %struct.s40, ptr %s1, i32 0, i32 0
427 // CHECK: getelementptr inbounds %struct.s40, ptr %s2, i32 0, i32 0
428 // CHECK: getelementptr inbounds %struct.s40, ptr %s1, i32 0, i32 1
429 // CHECK: getelementptr inbounds %struct.s40, ptr %s2, i32 0, i32 1
430 return s1
.i
+ s2
.i
+ i
+ s1
.s
+ s2
.s
;
435 // CHECK: define{{.*}} i32 @caller40()
436 // CHECK: %[[a:.*]] = load [2 x i64], ptr @g40, align 4
437 // CHECK: %[[b:.*]] = load [2 x i64], ptr @g40_2, align 4
438 // CHECK: call i32 @f40(i32 noundef 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
439 return f40(3, g40
, g40_2
);
441 // passing structs on stack
442 __attribute__ ((noinline
))
443 int f40_stack(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
, int i8
,
444 int i9
, s40_no_align s1
, s40_no_align s2
) {
445 // CHECK: define{{.*}} i32 @f40_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
446 // CHECK: %s1 = alloca %struct.s40, align 4
447 // CHECK: %s2 = alloca %struct.s40, align 4
448 // CHECK: store [2 x i64] %s1.coerce, ptr %{{.*}}, align 4
449 // CHECK: store [2 x i64] %s2.coerce, ptr %{{.*}}, align 4
450 // CHECK: getelementptr inbounds %struct.s40, ptr %s1, i32 0, i32 0
451 // CHECK: getelementptr inbounds %struct.s40, ptr %s2, i32 0, i32 0
452 // CHECK: getelementptr inbounds %struct.s40, ptr %s1, i32 0, i32 1
453 // CHECK: getelementptr inbounds %struct.s40, ptr %s2, i32 0, i32 1
454 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ i8
+ i9
+ s1
.s
+ s2
.s
;
456 int caller40_stack() {
457 // CHECK: define{{.*}} i32 @caller40_stack()
458 // CHECK: %[[a:.*]] = load [2 x i64], ptr @g40, align 4
459 // CHECK: %[[b:.*]] = load [2 x i64], ptr @g40_2, align 4
460 // CHECK: call i32 @f40_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
461 return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40
, g40_2
);
464 // structs with size <= 16 bytes, with alignment attribute
471 } __attribute__((aligned(16)));
472 typedef struct s41 s41_with_align
;
473 // passing aligned structs in registers
474 __attribute__ ((noinline
))
475 int f41(int i
, s41_with_align s1
, s41_with_align s2
) {
476 // CHECK: define{{.*}} i32 @f41(i32 noundef %i, i128 %s1.coerce, i128 %s2.coerce)
477 // CHECK: %s1 = alloca %struct.s41, align 16
478 // CHECK: %s2 = alloca %struct.s41, align 16
479 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
480 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
481 // CHECK: getelementptr inbounds %struct.s41, ptr %s1, i32 0, i32 0
482 // CHECK: getelementptr inbounds %struct.s41, ptr %s2, i32 0, i32 0
483 // CHECK: getelementptr inbounds %struct.s41, ptr %s1, i32 0, i32 1
484 // CHECK: getelementptr inbounds %struct.s41, ptr %s2, i32 0, i32 1
485 return s1
.i
+ s2
.i
+ i
+ s1
.s
+ s2
.s
;
488 s41_with_align g41_2
;
490 // CHECK: define{{.*}} i32 @caller41()
491 // CHECK: %[[a:.*]] = load i128, ptr @g41, align 16
492 // CHECK: %[[b:.*]] = load i128, ptr @g41_2, align 16
493 // CHECK: call i32 @f41(i32 noundef 3, i128 %[[a]], i128 %[[b]])
494 return f41(3, g41
, g41_2
);
496 // passing aligned structs on stack
497 __attribute__ ((noinline
))
498 int f41_stack(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
, int i8
,
499 int i9
, s41_with_align s1
, s41_with_align s2
) {
500 // CHECK: define{{.*}} i32 @f41_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, i128 %s1.coerce, i128 %s2.coerce)
501 // CHECK: %s1 = alloca %struct.s41, align 16
502 // CHECK: %s2 = alloca %struct.s41, align 16
503 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
504 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
505 // CHECK: getelementptr inbounds %struct.s41, ptr %s1, i32 0, i32 0
506 // CHECK: getelementptr inbounds %struct.s41, ptr %s2, i32 0, i32 0
507 // CHECK: getelementptr inbounds %struct.s41, ptr %s1, i32 0, i32 1
508 // CHECK: getelementptr inbounds %struct.s41, ptr %s2, i32 0, i32 1
509 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ i8
+ i9
+ s1
.s
+ s2
.s
;
511 int caller41_stack() {
512 // CHECK: define{{.*}} i32 @caller41_stack()
513 // CHECK: %[[a:.*]] = load i128, ptr @g41, align 16
514 // CHECK: %[[b:.*]] = load i128, ptr @g41_2, align 16
515 // CHECK: call i32 @f41_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, i128 %[[a]], i128 %[[b]])
516 return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41
, g41_2
);
519 // structs with size > 16 bytes, without alignment attribute
529 typedef struct s42 s42_no_align
;
530 // passing structs in registers
531 __attribute__ ((noinline
))
532 int f42(int i
, s42_no_align s1
, s42_no_align s2
) {
533 // CHECK: define{{.*}} i32 @f42(i32 noundef %i, ptr noundef %s1, ptr noundef %s2)
534 // CHECK: getelementptr inbounds %struct.s42, ptr %s1, i32 0, i32 0
535 // CHECK: getelementptr inbounds %struct.s42, ptr %s2, i32 0, i32 0
536 // CHECK: getelementptr inbounds %struct.s42, ptr %s1, i32 0, i32 1
537 // CHECK: getelementptr inbounds %struct.s42, ptr %s2, i32 0, i32 1
538 return s1
.i
+ s2
.i
+ i
+ s1
.s
+ s2
.s
;
543 // CHECK: define{{.*}} i32 @caller42()
544 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
545 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
546 // CHECK: call void @llvm.memcpy.p0.p0.i64
547 // CHECK: call void @llvm.memcpy.p0.p0.i64
548 // CHECK: call i32 @f42(i32 noundef 3, ptr noundef %[[a]], ptr noundef %[[b]])
549 return f42(3, g42
, g42_2
);
551 // passing structs on stack
552 __attribute__ ((noinline
))
553 int f42_stack(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
, int i8
,
554 int i9
, s42_no_align s1
, s42_no_align s2
) {
555 // CHECK: define{{.*}} i32 @f42_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, ptr noundef %s1, ptr noundef %s2)
556 // CHECK: getelementptr inbounds %struct.s42, ptr %s1, i32 0, i32 0
557 // CHECK: getelementptr inbounds %struct.s42, ptr %s2, i32 0, i32 0
558 // CHECK: getelementptr inbounds %struct.s42, ptr %s1, i32 0, i32 1
559 // CHECK: getelementptr inbounds %struct.s42, ptr %s2, i32 0, i32 1
560 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ i8
+ i9
+ s1
.s
+ s2
.s
;
562 int caller42_stack() {
563 // CHECK: define{{.*}} i32 @caller42_stack()
564 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
565 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
566 // CHECK: call void @llvm.memcpy.p0.p0.i64
567 // CHECK: call void @llvm.memcpy.p0.p0.i64
568 // CHECK: call i32 @f42_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, ptr noundef %[[a]], ptr noundef %[[b]])
569 return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42
, g42_2
);
572 // structs with size > 16 bytes, with alignment attribute
581 } __attribute__((aligned(16)));
582 typedef struct s43 s43_with_align
;
583 // passing aligned structs in registers
584 __attribute__ ((noinline
))
585 int f43(int i
, s43_with_align s1
, s43_with_align s2
) {
586 // CHECK: define{{.*}} i32 @f43(i32 noundef %i, ptr noundef %s1, ptr noundef %s2)
587 // CHECK: getelementptr inbounds %struct.s43, ptr %s1, i32 0, i32 0
588 // CHECK: getelementptr inbounds %struct.s43, ptr %s2, i32 0, i32 0
589 // CHECK: getelementptr inbounds %struct.s43, ptr %s1, i32 0, i32 1
590 // CHECK: getelementptr inbounds %struct.s43, ptr %s2, i32 0, i32 1
591 return s1
.i
+ s2
.i
+ i
+ s1
.s
+ s2
.s
;
594 s43_with_align g43_2
;
596 // CHECK: define{{.*}} i32 @caller43()
597 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
598 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
599 // CHECK: call void @llvm.memcpy.p0.p0.i64
600 // CHECK: call void @llvm.memcpy.p0.p0.i64
601 // CHECK: call i32 @f43(i32 noundef 3, ptr noundef %[[a]], ptr noundef %[[b]])
602 return f43(3, g43
, g43_2
);
604 // passing aligned structs on stack
605 __attribute__ ((noinline
))
606 int f43_stack(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
, int i8
,
607 int i9
, s43_with_align s1
, s43_with_align s2
) {
608 // CHECK: define{{.*}} i32 @f43_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, ptr noundef %s1, ptr noundef %s2)
609 // CHECK: getelementptr inbounds %struct.s43, ptr %s1, i32 0, i32 0
610 // CHECK: getelementptr inbounds %struct.s43, ptr %s2, i32 0, i32 0
611 // CHECK: getelementptr inbounds %struct.s43, ptr %s1, i32 0, i32 1
612 // CHECK: getelementptr inbounds %struct.s43, ptr %s2, i32 0, i32 1
613 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ i8
+ i9
+ s1
.s
+ s2
.s
;
615 int caller43_stack() {
616 // CHECK: define{{.*}} i32 @caller43_stack()
617 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
618 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
619 // CHECK: call void @llvm.memcpy.p0.p0.i64
620 // CHECK: call void @llvm.memcpy.p0.p0.i64
621 // CHECK: call i32 @f43_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, ptr noundef %[[a]], ptr noundef %[[b]])
622 return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43
, g43_2
);
625 // We should not split argument s1 between registers and stack.
626 __attribute__ ((noinline
))
627 int f40_split(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
,
628 s40_no_align s1
, s40_no_align s2
) {
629 // CHECK: define{{.*}} i32 @f40_split(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
630 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ s1
.s
+ s2
.s
;
632 int caller40_split() {
633 // CHECK: define{{.*}} i32 @caller40_split()
634 // CHECK: call i32 @f40_split(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
635 return f40_split(1, 2, 3, 4, 5, 6, 7, g40
, g40_2
);
638 __attribute__ ((noinline
))
639 int f41_split(int i
, int i2
, int i3
, int i4
, int i5
, int i6
, int i7
,
640 s41_with_align s1
, s41_with_align s2
) {
641 // CHECK: define{{.*}} i32 @f41_split(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i128 %s1.coerce, i128 %s2.coerce)
642 return s1
.i
+ s2
.i
+ i
+ i2
+ i3
+ i4
+ i5
+ i6
+ i7
+ s1
.s
+ s2
.s
;
644 int caller41_split() {
645 // CHECK: define{{.*}} i32 @caller41_split()
646 // CHECK: call i32 @f41_split(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i128 %{{.*}}, i128 %{{.*}})
647 return f41_split(1, 2, 3, 4, 5, 6, 7, g41
, g41_2
);
650 // Handle homogeneous aggregates properly in variadic functions.
655 float test_hfa(int n
, ...) {
656 // CHECK-LE-LABEL: define{{.*}} float @test_hfa(i32 noundef %n, ...)
657 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
658 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
660 // HFA is not indirect, so occupies its full 16 bytes on the stack.
661 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i64 16
662 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
664 __builtin_va_list thelist
;
665 __builtin_va_start(thelist
, n
);
666 struct HFA h
= __builtin_va_arg(thelist
, struct HFA
);
670 float test_hfa_call(struct HFA
*a
) {
671 // CHECK-LABEL: define{{.*}} float @test_hfa_call(ptr noundef %a)
672 // CHECK: call float (i32, ...) @test_hfa(i32 noundef 1, [4 x float] {{.*}})
680 float test_toobig_hfa(int n
, ...) {
681 // CHECK-LE-LABEL: define{{.*}} float @test_toobig_hfa(i32 noundef %n, ...)
682 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
683 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
685 // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
686 // of stack consumed.
687 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i64 8
688 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
690 // CHECK-LE: [[HFAPTR:%.*]] = load ptr, ptr [[CURLIST]]
691 __builtin_va_list thelist
;
692 __builtin_va_start(thelist
, n
);
693 struct TooBigHFA h
= __builtin_va_arg(thelist
, struct TooBigHFA
);
701 int32x4_t
test_hva(int n
, ...) {
702 // CHECK-LE-LABEL: define{{.*}} <4 x i32> @test_hva(i32 noundef %n, ...)
703 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
704 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
706 // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
707 // must be properly aligned.
708 // CHECK-LE: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i32 15
709 // CHECK-LE: [[ALIGNED_LIST:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[GEP]], i64 -16)
711 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_LIST]], i64 32
712 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
714 __builtin_va_list thelist
;
715 __builtin_va_start(thelist
, n
);
716 struct HVA h
= __builtin_va_arg(thelist
, struct HVA
);
721 int32x4_t a
, b
, c
, d
, e
;
724 int32x4_t
test_toobig_hva(int n
, ...) {
725 // CHECK-LE-LABEL: define{{.*}} <4 x i32> @test_toobig_hva(i32 noundef %n, ...)
726 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
727 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
729 // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
730 // of stack consumed.
731 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i64 8
732 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
734 // CHECK-LE: [[HVAPTR:%.*]] = load ptr, ptr [[CURLIST]]
735 __builtin_va_list thelist
;
736 __builtin_va_start(thelist
, n
);
737 struct TooBigHVA h
= __builtin_va_arg(thelist
, struct TooBigHVA
);
741 typedef __attribute__((__ext_vector_type__(3))) float float32x3_t
;
742 typedef struct { float32x3_t arr
[4]; } HFAv3
;
744 float32x3_t
test_hva_v3(int n
, ...) {
745 // CHECK-LE-LABEL: define{{.*}} <3 x float> @test_hva_v3(i32 noundef %n, ...)
746 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
747 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
749 // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
750 // must be properly aligned.
752 // CHECK-LE: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i32 15
753 // CHECK-LE: [[ALIGNED_LIST:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[GEP]], i64 -16)
754 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_LIST]], i64 64
755 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
758 __builtin_va_start(l
, n
);
759 HFAv3 r
= __builtin_va_arg(l
, HFAv3
);
763 float32x3_t
test_hva_v3_call(HFAv3
*a
) {
764 // CHECK-LABEL: define{{.*}} <3 x float> @test_hva_v3_call(ptr noundef %a)
765 // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 noundef 1, [4 x <4 x float>] {{.*}})
766 return test_hva_v3(1, *a
);