clang/test/CodeGen/arm64-arguments.c

   1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
   2 // RUN: %clang_cc1 -triple aarch64_be-none-linux-gnu -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
   3
   4 // REQUIRES: aarch64-registered-target || arm-registered-target
   5
   6 // CHECK: define{{.*}} signext i8 @f0()
   7 char f0(void) {
   8   return 0;
   9 }
  10
  11 // Struct as return type. Aggregates <= 16 bytes are passed directly. For BE,
  12 // return values are round up to 64 bits.
  13 //
  14 // CHECK-LE: define{{.*}} i8 @f1()
  15 // CHECK-BE: define{{.*}} i64 @f1()
  16 struct s1 { char f0; };
  17 struct s1 f1(void) {}
  18
  19 // CHECK-LE: define{{.*}} i16 @f2()
  20 // CHECK-BE: define{{.*}} i64 @f2()
  21 struct s2 { short f0; };
  22 struct s2 f2(void) {}
  23
  24 // CHECK-LE: define{{.*}} i32 @f3()
  25 // CHECK-BE: define{{.*}} i64 @f3()
  26 struct s3 { int f0; };
  27 struct s3 f3(void) {}
  28
  29 // CHECK-LE: define{{.*}} i32 @f4()
  30 // CHECK-BE: define{{.*}} i64 @f4()
  31 struct s4 { struct s4_0 { int f0; } f0; };
  32 struct s4 f4(void) {}
  33
  34 // CHECK-LE: define{{.*}} i32 @f5()
  35 // CHECK-BE: define{{.*}} i64 @f5()
  36 struct s5 { struct { } f0; int f1; };
  37 struct s5 f5(void) {}
  38
  39 // CHECK-LE: define{{.*}} i32 @f6()
  40 // CHECK-BE: define{{.*}} i64 @f6()
  41 struct s6 { int f0[1]; };
  42 struct s6 f6(void) {}
  43
  44 // CHECK: define{{.*}} void @f7()
  45 struct s7 { struct { int : 0; } f0; };
  46 struct s7 f7(void) {}
  47
  48 // CHECK: define{{.*}} void @f8()
  49 struct s8 { struct { int : 0; } f0[1]; };
  50 struct s8 f8(void) {}
  51
  52 // CHECK-LE: define{{.*}} i32 @f9()
  53 // CHECK-BE: define{{.*}} i64 @f9()
  54 struct s9 { int f0; int : 0; };
  55 struct s9 f9(void) {}
  56
  57 // CHECK-LE: define{{.*}} i32 @f10()
  58 // CHECK-BE: define{{.*}} i64 @f10()
  59 struct s10 { int f0; int : 0; int : 0; };
  60 struct s10 f10(void) {}
  61
  62 // CHECK-LE: define{{.*}} i32 @f11()
  63 // CHECK-BE: define{{.*}} i64 @f11()
  64 struct s11 { int : 0; int f0; };
  65 struct s11 f11(void) {}
  66
  67 // CHECK-LE: define{{.*}} i24 @f11_packed()
  68 // CHECK-BE: define{{.*}} i64 @f11_packed()
  69 struct s11_packed { char c; short s } __attribute__((packed));
  70 struct s11_packed f11_packed(void) { }
  71
  72 // CHECK-LE: define{{.*}} i32 @f11_not_packed()
  73 // CHECK-BE: define{{.*}} i64 @f11_not_packed()
  74 struct s11_not_packed { char c; short s; };
  75 struct s11_not_packed f11_not_packed(void) { }
  76
  77 // CHECK-LE: define{{.*}} i32 @f12()
  78 // CHECK-BE: define{{.*}} i64 @f12()
  79 union u12 { char f0; short f1; int f2; };
  80 union u12 f12(void) {}
  81
  82 // Homogeneous Aggregate as return type will be passed directly.
  83 // CHECK: define{{.*}} %struct.s13 @f13()
  84 struct s13 { float f0; };
  85 struct s13 f13(void) {}
  86 // CHECK: define{{.*}} %union.u14 @f14()
  87 union u14 { float f0; };
  88 union u14 f14(void) {}
  89
  90 // CHECK: define{{.*}} void @f15()
  91 void f15(struct s7 a0) {}
  92
  93 // CHECK: define{{.*}} void @f16()
  94 void f16(struct s8 a0) {}
  95
  96 // CHECK-LE: define{{.*}} i32 @f17()
  97 // CHECK-BE: define{{.*}} i64 @f17()
  98 struct s17 { short f0 : 13; char f1 : 4; };
  99 struct s17 f17(void) {}
 100
 101 // CHECK-LE: define{{.*}} i32 @f18()
 102 // CHECK-BE: define{{.*}} i64 @f18()
 103 struct s18 { short f0; char f1 : 4; };
 104 struct s18 f18(void) {}
 105
 106 // CHECK-LE: define{{.*}} i32 @f19()
 107 // CHECK-BE: define{{.*}} i64 @f19()
 108 struct s19 { int f0; struct s8 f1; };
 109 struct s19 f19(void) {}
 110
 111 // CHECK-LE: define{{.*}} i32 @f20()
 112 // CHECK-BE: define{{.*}} i64 @f20()
 113 struct s20 { struct s8 f1; int f0; };
 114 struct s20 f20(void) {}
 115
 116 // CHECK-LE: define{{.*}} i32 @f21()
 117 // CHECK-BE: define{{.*}} i64 @f21()
 118 struct s21 { struct {} f1; int f0 : 4; };
 119 struct s21 f21(void) {}
 120
 121 // CHECK-LE: define{{.*}} i16 @f22()
 122 // CHECK-LE: define{{.*}} i32 @f23()
 123 // CHECK-BE: define{{.*}} i64 @f22()
 124 // CHECK-BE: define{{.*}} i64 @f23()
 125 // CHECK: define{{.*}} i64 @f24()
 126 // CHECK: define{{.*}} [2 x i64] @f25()
 127 // CHECK: define{{.*}} { float, float } @f26()
 128 // CHECK: define{{.*}} { double, double } @f27()
 129 _Complex char       f22(void) {}
 130 _Complex short      f23(void) {}
 131 _Complex int        f24(void) {}
 132 _Complex long long  f25(void) {}
 133 _Complex float      f26(void) {}
 134 _Complex double     f27(void) {}
 135
 136 // CHECK-LE: define{{.*}} i16 @f28()
 137 // CHECK-BE: define{{.*}} i64 @f28()
 138 struct s28 { _Complex char f0; };
 139 struct s28 f28() {}
 140
 141 // CHECK-LE: define{{.*}} i32 @f29()
 142 // CHECK-BE: define{{.*}} i64 @f29()
 143 struct s29 { _Complex short f0; };
 144 struct s29 f29() {}
 145
 146 // CHECK: define{{.*}} i64 @f30()
 147 struct s30 { _Complex int f0; };
 148 struct s30 f30() {}
 149
 150 struct s31 { char x; };
 151 void f31(struct s31 s) { }
 152 // CHECK: define{{.*}} void @f31(i64 %s.coerce)
 153 // CHECK: %s = alloca %struct.s31, align 1
 154 // CHECK-BE: %coerce.highbits = lshr i64 %s.coerce, 56
 155 // CHECK-BE: trunc i64 %coerce.highbits to i8
 156 // CHECK-LE: trunc i64 %s.coerce to i8
 157 // CHECK: store i8 %{{.*}},
 158
 159 struct s32 { double x; };
 160 void f32(struct s32 s) { }
 161 // CHECK: @f32([1 x double] %{{.*}})
 162
 163 // A composite type larger than 16 bytes should be passed indirectly.
 164 struct s33 { char buf[32*32]; };
 165 void f33(struct s33 s) { }
 166 // CHECK: define{{.*}} void @f33(ptr noundef %s)
 167
 168 struct s34 { char c; };
 169 void f34(struct s34 s);
 170 void g34(struct s34 *s) { f34(*s); }
 171 // CHECK: @g34(ptr noundef %s)
 172 // CHECK: %[[a:.*]] = load i8, ptr %{{.*}}
 173 // CHECK: zext i8 %[[a]] to i64
 174 // CHECK: call void @f34(i64 %{{.*}})
 175
 176 /*
 177  * Check that va_arg accesses stack according to ABI alignment
 178  */
 179 long long t1(int i, ...) {
 180     // CHECK: t1
 181     __builtin_va_list ap;
 182     __builtin_va_start(ap, i);
 183     // CHECK-NOT: add i32 %{{.*}} 7
 184     // CHECK-NOT: and i32 %{{.*}} -8
 185     long long ll = __builtin_va_arg(ap, long long);
 186     __builtin_va_end(ap);
 187     return ll;
 188 }
 189 double t2(int i, ...) {
 190     // CHECK: t2
 191     __builtin_va_list ap;
 192     __builtin_va_start(ap, i);
 193     // CHECK-NOT: add i32 %{{.*}} 7
 194     // CHECK-NOT: and i32 %{{.*}} -8
 195     double ll = __builtin_va_arg(ap, double);
 196     __builtin_va_end(ap);
 197     return ll;
 198 }
 199 _Bool t3(int i, ...) {
 200   // CHECK: t3
 201   __builtin_va_list ap;
 202   __builtin_va_start(ap, i);
 203   // CHECK:      %0 = va_arg ptr %ap, i8
 204   // CHECK-NEXT: store i8 %0, ptr %varet, align 1
 205   _Bool b = __builtin_va_arg(ap, _Bool);
 206   __builtin_va_end(ap);
 207   return b;
 208 }
 209
 210 #include <arm_neon.h>
 211
 212 // Homogeneous Vector Aggregate as return type and argument type.
 213 // CHECK: define{{.*}} %struct.int8x16x2_t @f0_0(<16 x i8> noundef %{{.*}}, <16 x i8> noundef %{{.*}})
 214 int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) {
 215   return vzipq_s8(a0, a1);
 216 }
 217
 218 // Test direct vector passing.
 219 typedef float T_float32x2 __attribute__ ((__vector_size__ (8)));
 220 typedef float T_float32x4 __attribute__ ((__vector_size__ (16)));
 221 typedef float T_float32x8 __attribute__ ((__vector_size__ (32)));
 222 typedef float T_float32x16 __attribute__ ((__vector_size__ (64)));
 223
 224 // CHECK: define{{.*}} <2 x float> @f1_0(<2 x float> noundef %{{.*}})
 225 T_float32x2 f1_0(T_float32x2 a0) { return a0; }
 226 // CHECK: define{{.*}} <4 x float> @f1_1(<4 x float> noundef %{{.*}})
 227 T_float32x4 f1_1(T_float32x4 a0) { return a0; }
 228 // Vector with length bigger than 16-byte is illegal and is passed indirectly.
 229 // CHECK: define{{.*}} void @f1_2(ptr dead_on_unwind noalias writable sret(<8 x float>) align 16 %{{.*}}, ptr noundef %0)
 230 T_float32x8 f1_2(T_float32x8 a0) { return a0; }
 231 // CHECK: define{{.*}} void @f1_3(ptr dead_on_unwind noalias writable sret(<16 x float>) align 16 %{{.*}}, ptr noundef %0)
 232 T_float32x16 f1_3(T_float32x16 a0) { return a0; }
 233
 234 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and
 235 // aggregates with size > 16 bytes.
 236 struct s35
 237 {
 238    float v[4]; //Testing HFA.
 239 } __attribute__((aligned(16)));
 240 typedef struct s35 s35_with_align;
 241
 242 typedef __attribute__((neon_vector_type(4))) float float32x4_t;
 243 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
 244 // CHECK: define{{.*}} <4 x float> @f35(i32 noundef %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
 245 // CHECK: %s1 = alloca %struct.s35, align 16
 246 // CHECK: %s2 = alloca %struct.s35, align 16
 247 // CHECK: load <4 x float>, ptr %s1, align 16
 248 // CHECK: load <4 x float>, ptr %s2, align 16
 249   float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
 250                             *(float32x4_t *)&s2);
 251   return v;
 252 }
 253
 254 struct s36
 255 {
 256    int v[4]; //Testing 16-byte aggregate.
 257 } __attribute__((aligned(16)));
 258 typedef struct s36 s36_with_align;
 259
 260 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
 261 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
 262 // CHECK: define{{.*}} <4 x i32> @f36(i32 noundef %i, i128 %s1.coerce, i128 %s2.coerce)
 263 // CHECK: %s1 = alloca %struct.s36, align 16
 264 // CHECK: %s2 = alloca %struct.s36, align 16
 265 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
 266 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
 267 // CHECK: load <4 x i32>, ptr %s1, align 16
 268 // CHECK: load <4 x i32>, ptr %s2, align 16
 269   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
 270                           *(int32x4_t *)&s2);
 271   return v;
 272 }
 273
 274 struct s37
 275 {
 276    int v[18]; //Testing large aggregate.
 277 } __attribute__((aligned(16)));
 278 typedef struct s37 s37_with_align;
 279
 280 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) {
 281 // CHECK: define{{.*}} <4 x i32> @f37(i32 noundef %i, ptr noundef %s1, ptr noundef %s2)
 282 // CHECK: load <4 x i32>, ptr %s1, align 16
 283 // CHECK: load <4 x i32>, ptr %s2, align 16
 284   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
 285                           *(int32x4_t *)&s2);
 286   return v;
 287 }
 288 s37_with_align g37;
 289 int32x4_t caller37() {
 290 // CHECK: caller37
 291 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16
 292 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16
 293 // CHECK: call void @llvm.memcpy
 294 // CHECK: call void @llvm.memcpy
 295 // CHECK: call <4 x i32> @f37(i32 noundef 3, ptr noundef %[[a]], ptr noundef %[[b]])
 296   return f37(3, g37, g37);
 297 }
 298
 299 // Test passing structs with size < 8, < 16 and > 16
 300 // with alignment of 16 and without
 301
 302 // structs with size <= 8 bytes, without alignment attribute
 303 // passed as i64 regardless of the align attribute
 304 struct s38
 305 {
 306   int i;
 307   short s;
 308 };
 309 typedef struct s38 s38_no_align;
 310 // passing structs in registers
 311 __attribute__ ((noinline))
 312 int f38(int i, s38_no_align s1, s38_no_align s2) {
 313 // CHECK: define{{.*}} i32 @f38(i32 noundef %i, i64 %s1.coerce, i64 %s2.coerce)
 314 // CHECK: %s1 = alloca %struct.s38, align 4
 315 // CHECK: %s2 = alloca %struct.s38, align 4
 316 // CHECK: store i64 %s1.coerce, ptr %{{.*}}, align 4
 317 // CHECK: store i64 %s2.coerce, ptr %{{.*}}, align 4
 318 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s1, i32 0, i32 0
 319 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s2, i32 0, i32 0
 320 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s1, i32 0, i32 1
 321 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s2, i32 0, i32 1
 322   return s1.i + s2.i + i + s1.s + s2.s;
 323 }
 324 s38_no_align g38;
 325 s38_no_align g38_2;
 326 int caller38() {
 327 // CHECK: define{{.*}} i32 @caller38()
 328 // CHECK: %[[a:.*]] = load i64, ptr @g38, align 4
 329 // CHECK: %[[b:.*]] = load i64, ptr @g38_2, align 4
 330 // CHECK: call i32 @f38(i32 noundef 3, i64 %[[a]], i64 %[[b]])
 331   return f38(3, g38, g38_2);
 332 }
 333 // passing structs on stack
 334 __attribute__ ((noinline))
 335 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 336               int i9, s38_no_align s1, s38_no_align s2) {
 337 // CHECK: define{{.*}} i32 @f38_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, i64 %s1.coerce, i64 %s2.coerce)
 338 // CHECK: %s1 = alloca %struct.s38, align 4
 339 // CHECK: %s2 = alloca %struct.s38, align 4
 340 // CHECK: store i64 %s1.coerce, ptr %{{.*}}, align 4
 341 // CHECK: store i64 %s2.coerce, ptr %{{.*}}, align 4
 342 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s1, i32 0, i32 0
 343 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s2, i32 0, i32 0
 344 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s1, i32 0, i32 1
 345 // CHECK: getelementptr inbounds nuw %struct.s38, ptr %s2, i32 0, i32 1
 346   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 347 }
 348 int caller38_stack() {
 349 // CHECK: define{{.*}} i32 @caller38_stack()
 350 // CHECK: %[[a:.*]] = load i64, ptr @g38, align 4
 351 // CHECK: %[[b:.*]] = load i64, ptr @g38_2, align 4
 352 // CHECK: call i32 @f38_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, i64 %[[a]], i64 %[[b]])
 353   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
 354 }
 355
 356 // structs with size <= 8 bytes, with alignment attribute
 357 struct s39
 358 {
 359   int i;
 360   short s;
 361 } __attribute__((aligned(16)));
 362 typedef struct s39 s39_with_align;
 363 // passing aligned structs in registers
 364 __attribute__ ((noinline))
 365 int f39(int i, s39_with_align s1, s39_with_align s2) {
 366 // CHECK: define{{.*}} i32 @f39(i32 noundef %i, i128 %s1.coerce, i128 %s2.coerce)
 367 // CHECK: %s1 = alloca %struct.s39, align 16
 368 // CHECK: %s2 = alloca %struct.s39, align 16
 369 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
 370 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
 371 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s1, i32 0, i32 0
 372 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s2, i32 0, i32 0
 373 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s1, i32 0, i32 1
 374 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s2, i32 0, i32 1
 375   return s1.i + s2.i + i + s1.s + s2.s;
 376 }
 377 s39_with_align g39;
 378 s39_with_align g39_2;
 379 int caller39() {
 380 // CHECK: define{{.*}} i32 @caller39()
 381 // CHECK: %[[a:.*]] = load i128, ptr @g39, align 16
 382 // CHECK: %[[b:.*]] = load i128, ptr @g39_2, align 16
 383 // CHECK: call i32 @f39(i32 noundef 3, i128 %[[a]], i128 %[[b]])
 384   return f39(3, g39, g39_2);
 385 }
 386 // passing aligned structs on stack
 387 __attribute__ ((noinline))
 388 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 389               int i9, s39_with_align s1, s39_with_align s2) {
 390 // CHECK: define{{.*}} i32 @f39_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, i128 %s1.coerce, i128 %s2.coerce)
 391 // CHECK: %s1 = alloca %struct.s39, align 16
 392 // CHECK: %s2 = alloca %struct.s39, align 16
 393 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
 394 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
 395 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s1, i32 0, i32 0
 396 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s2, i32 0, i32 0
 397 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s1, i32 0, i32 1
 398 // CHECK: getelementptr inbounds nuw %struct.s39, ptr %s2, i32 0, i32 1
 399   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 400 }
 401 int caller39_stack() {
 402 // CHECK: define{{.*}} i32 @caller39_stack()
 403 // CHECK: %[[a:.*]] = load i128, ptr @g39, align 16
 404 // CHECK: %[[b:.*]] = load i128, ptr @g39_2, align 16
 405 // CHECK: call i32 @f39_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, i128 %[[a]], i128 %[[b]])
 406   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
 407 }
 408
 409 // structs with size <= 16 bytes, without alignment attribute
 410 struct s40
 411 {
 412   int i;
 413   short s;
 414   int i2;
 415   short s2;
 416 };
 417 typedef struct s40 s40_no_align;
 418 // passing structs in registers
 419 __attribute__ ((noinline))
 420 int f40(int i, s40_no_align s1, s40_no_align s2) {
 421 // CHECK: define{{.*}} i32 @f40(i32 noundef %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 422 // CHECK: %s1 = alloca %struct.s40, align 4
 423 // CHECK: %s2 = alloca %struct.s40, align 4
 424 // CHECK: store [2 x i64] %s1.coerce, ptr %{{.*}}, align 4
 425 // CHECK: store [2 x i64] %s2.coerce, ptr %{{.*}}, align 4
 426 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s1, i32 0, i32 0
 427 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s2, i32 0, i32 0
 428 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s1, i32 0, i32 1
 429 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s2, i32 0, i32 1
 430   return s1.i + s2.i + i + s1.s + s2.s;
 431 }
 432 s40_no_align g40;
 433 s40_no_align g40_2;
 434 int caller40() {
 435 // CHECK: define{{.*}} i32 @caller40()
 436 // CHECK: %[[a:.*]] = load [2 x i64], ptr @g40, align 4
 437 // CHECK: %[[b:.*]] = load [2 x i64], ptr @g40_2, align 4
 438 // CHECK: call i32 @f40(i32 noundef 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
 439   return f40(3, g40, g40_2);
 440 }
 441 // passing structs on stack
 442 __attribute__ ((noinline))
 443 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 444               int i9, s40_no_align s1, s40_no_align s2) {
 445 // CHECK: define{{.*}} i32 @f40_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 446 // CHECK: %s1 = alloca %struct.s40, align 4
 447 // CHECK: %s2 = alloca %struct.s40, align 4
 448 // CHECK: store [2 x i64] %s1.coerce, ptr %{{.*}}, align 4
 449 // CHECK: store [2 x i64] %s2.coerce, ptr %{{.*}}, align 4
 450 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s1, i32 0, i32 0
 451 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s2, i32 0, i32 0
 452 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s1, i32 0, i32 1
 453 // CHECK: getelementptr inbounds nuw %struct.s40, ptr %s2, i32 0, i32 1
 454   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 455 }
 456 int caller40_stack() {
 457 // CHECK: define{{.*}} i32 @caller40_stack()
 458 // CHECK: %[[a:.*]] = load [2 x i64], ptr @g40, align 4
 459 // CHECK: %[[b:.*]] = load [2 x i64], ptr @g40_2, align 4
 460 // CHECK: call i32 @f40_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
 461   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
 462 }
 463
 464 // structs with size <= 16 bytes, with alignment attribute
 465 struct s41
 466 {
 467   int i;
 468   short s;
 469   int i2;
 470   short s2;
 471 } __attribute__((aligned(16)));
 472 typedef struct s41 s41_with_align;
 473 // passing aligned structs in registers
 474 __attribute__ ((noinline))
 475 int f41(int i, s41_with_align s1, s41_with_align s2) {
 476 // CHECK: define{{.*}} i32 @f41(i32 noundef %i, i128 %s1.coerce, i128 %s2.coerce)
 477 // CHECK: %s1 = alloca %struct.s41, align 16
 478 // CHECK: %s2 = alloca %struct.s41, align 16
 479 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
 480 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
 481 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s1, i32 0, i32 0
 482 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s2, i32 0, i32 0
 483 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s1, i32 0, i32 1
 484 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s2, i32 0, i32 1
 485   return s1.i + s2.i + i + s1.s + s2.s;
 486 }
 487 s41_with_align g41;
 488 s41_with_align g41_2;
 489 int caller41() {
 490 // CHECK: define{{.*}} i32 @caller41()
 491 // CHECK: %[[a:.*]] = load i128, ptr @g41, align 16
 492 // CHECK: %[[b:.*]] = load i128, ptr @g41_2, align 16
 493 // CHECK: call i32 @f41(i32 noundef 3, i128 %[[a]], i128 %[[b]])
 494   return f41(3, g41, g41_2);
 495 }
 496 // passing aligned structs on stack
 497 __attribute__ ((noinline))
 498 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 499               int i9, s41_with_align s1, s41_with_align s2) {
 500 // CHECK: define{{.*}} i32 @f41_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, i128 %s1.coerce, i128 %s2.coerce)
 501 // CHECK: %s1 = alloca %struct.s41, align 16
 502 // CHECK: %s2 = alloca %struct.s41, align 16
 503 // CHECK: store i128 %s1.coerce, ptr %{{.*}}, align 16
 504 // CHECK: store i128 %s2.coerce, ptr %{{.*}}, align 16
 505 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s1, i32 0, i32 0
 506 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s2, i32 0, i32 0
 507 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s1, i32 0, i32 1
 508 // CHECK: getelementptr inbounds nuw %struct.s41, ptr %s2, i32 0, i32 1
 509   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 510 }
 511 int caller41_stack() {
 512 // CHECK: define{{.*}} i32 @caller41_stack()
 513 // CHECK: %[[a:.*]] = load i128, ptr @g41, align 16
 514 // CHECK: %[[b:.*]] = load i128, ptr @g41_2, align 16
 515 // CHECK: call i32 @f41_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, i128 %[[a]], i128 %[[b]])
 516   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
 517 }
 518
 519 // structs with size > 16 bytes, without alignment attribute
 520 struct s42
 521 {
 522   int i;
 523   short s;
 524   int i2;
 525   short s2;
 526   int i3;
 527   short s3;
 528 };
 529 typedef struct s42 s42_no_align;
 530 // passing structs in registers
 531 __attribute__ ((noinline))
 532 int f42(int i, s42_no_align s1, s42_no_align s2) {
 533 // CHECK: define{{.*}} i32 @f42(i32 noundef %i, ptr noundef %s1, ptr noundef %s2)
 534 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s1, i32 0, i32 0
 535 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s2, i32 0, i32 0
 536 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s1, i32 0, i32 1
 537 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s2, i32 0, i32 1
 538   return s1.i + s2.i + i + s1.s + s2.s;
 539 }
 540 s42_no_align g42;
 541 s42_no_align g42_2;
 542 int caller42() {
 543 // CHECK: define{{.*}} i32 @caller42()
 544 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
 545 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
 546 // CHECK: call void @llvm.memcpy.p0.p0.i64
 547 // CHECK: call void @llvm.memcpy.p0.p0.i64
 548 // CHECK: call i32 @f42(i32 noundef 3, ptr noundef %[[a]], ptr noundef %[[b]])
 549   return f42(3, g42, g42_2);
 550 }
 551 // passing structs on stack
 552 __attribute__ ((noinline))
 553 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 554               int i9, s42_no_align s1, s42_no_align s2) {
 555 // CHECK: define{{.*}} i32 @f42_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, ptr noundef %s1, ptr noundef %s2)
 556 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s1, i32 0, i32 0
 557 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s2, i32 0, i32 0
 558 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s1, i32 0, i32 1
 559 // CHECK: getelementptr inbounds nuw %struct.s42, ptr %s2, i32 0, i32 1
 560   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 561 }
 562 int caller42_stack() {
 563 // CHECK: define{{.*}} i32 @caller42_stack()
 564 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
 565 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
 566 // CHECK: call void @llvm.memcpy.p0.p0.i64
 567 // CHECK: call void @llvm.memcpy.p0.p0.i64
 568 // CHECK: call i32 @f42_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, ptr noundef %[[a]], ptr noundef %[[b]])
 569   return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2);
 570 }
 571
 572 // structs with size > 16 bytes, with alignment attribute
 573 struct s43
 574 {
 575   int i;
 576   short s;
 577   int i2;
 578   short s2;
 579   int i3;
 580   short s3;
 581 } __attribute__((aligned(16)));
 582 typedef struct s43 s43_with_align;
 583 // passing aligned structs in registers
 584 __attribute__ ((noinline))
 585 int f43(int i, s43_with_align s1, s43_with_align s2) {
 586 // CHECK: define{{.*}} i32 @f43(i32 noundef %i, ptr noundef %s1, ptr noundef %s2)
 587 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s1, i32 0, i32 0
 588 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s2, i32 0, i32 0
 589 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s1, i32 0, i32 1
 590 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s2, i32 0, i32 1
 591   return s1.i + s2.i + i + s1.s + s2.s;
 592 }
 593 s43_with_align g43;
 594 s43_with_align g43_2;
 595 int caller43() {
 596 // CHECK: define{{.*}} i32 @caller43()
 597 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
 598 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
 599 // CHECK: call void @llvm.memcpy.p0.p0.i64
 600 // CHECK: call void @llvm.memcpy.p0.p0.i64
 601 // CHECK: call i32 @f43(i32 noundef 3, ptr noundef %[[a]], ptr noundef %[[b]])
 602   return f43(3, g43, g43_2);
 603 }
 604 // passing aligned structs on stack
 605 __attribute__ ((noinline))
 606 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 607               int i9, s43_with_align s1, s43_with_align s2) {
 608 // CHECK: define{{.*}} i32 @f43_stack(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i32 noundef %i8, i32 noundef %i9, ptr noundef %s1, ptr noundef %s2)
 609 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s1, i32 0, i32 0
 610 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s2, i32 0, i32 0
 611 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s1, i32 0, i32 1
 612 // CHECK: getelementptr inbounds nuw %struct.s43, ptr %s2, i32 0, i32 1
 613   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 614 }
 615 int caller43_stack() {
 616 // CHECK: define{{.*}} i32 @caller43_stack()
 617 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
 618 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
 619 // CHECK: call void @llvm.memcpy.p0.p0.i64
 620 // CHECK: call void @llvm.memcpy.p0.p0.i64
 621 // CHECK: call i32 @f43_stack(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i32 noundef 8, i32 noundef 9, ptr noundef %[[a]], ptr noundef %[[b]])
 622   return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2);
 623 }
 624
 625 // We should not split argument s1 between registers and stack.
 626 __attribute__ ((noinline))
 627 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
 628               s40_no_align s1, s40_no_align s2) {
 629 // CHECK: define{{.*}} i32 @f40_split(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 630   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 631 }
 632 int caller40_split() {
 633 // CHECK: define{{.*}} i32 @caller40_split()
 634 // CHECK: call i32 @f40_split(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
 635   return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
 636 }
 637
 638 __attribute__ ((noinline))
 639 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
 640               s41_with_align s1, s41_with_align s2) {
 641 // CHECK: define{{.*}} i32 @f41_split(i32 noundef %i, i32 noundef %i2, i32 noundef %i3, i32 noundef %i4, i32 noundef %i5, i32 noundef %i6, i32 noundef %i7, i128 %s1.coerce, i128 %s2.coerce)
 642   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 643 }
 644 int caller41_split() {
 645 // CHECK: define{{.*}} i32 @caller41_split()
 646 // CHECK: call i32 @f41_split(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6, i32 noundef 7, i128 %{{.*}}, i128 %{{.*}})
 647   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
 648 }
 649
 650 // Handle homogeneous aggregates properly in variadic functions.
 651 struct HFA {
 652   float a, b, c, d;
 653 };
 654
 655 float test_hfa(int n, ...) {
 656 // CHECK-LE-LABEL: define{{.*}} float @test_hfa(i32 noundef %n, ...)
 657 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
 658 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
 659
 660   // HFA is not indirect, so occupies its full 16 bytes on the stack.
 661 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i64 16
 662 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
 663
 664   __builtin_va_list thelist;
 665   __builtin_va_start(thelist, n);
 666   struct HFA h = __builtin_va_arg(thelist, struct HFA);
 667   return h.d;
 668 }
 669
 670 float test_hfa_call(struct HFA *a) {
 671 // CHECK-LABEL: define{{.*}} float @test_hfa_call(ptr noundef %a)
 672 // CHECK: call float (i32, ...) @test_hfa(i32 noundef 1, [4 x float] {{.*}})
 673   test_hfa(1, *a);
 674 }
 675
 676 struct TooBigHFA {
 677   float a, b, c, d, e;
 678 };
 679
 680 float test_toobig_hfa(int n, ...) {
 681 // CHECK-LE-LABEL: define{{.*}} float @test_toobig_hfa(i32 noundef %n, ...)
 682 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
 683 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
 684
 685   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
 686   // of stack consumed.
 687 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i64 8
 688 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
 689
 690 // CHECK-LE: [[HFAPTR:%.*]] = load ptr, ptr [[CURLIST]]
 691   __builtin_va_list thelist;
 692   __builtin_va_start(thelist, n);
 693   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
 694   return h.d;
 695 }
 696
 697 struct HVA {
 698   int32x4_t a, b;
 699 };
 700
 701 int32x4_t test_hva(int n, ...) {
 702 // CHECK-LE-LABEL: define{{.*}} <4 x i32> @test_hva(i32 noundef %n, ...)
 703 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
 704 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
 705
 706   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
 707   // must be properly aligned.
 708 // CHECK-LE: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i32 15
 709 // CHECK-LE: [[ALIGNED_LIST:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[GEP]], i64 -16)
 710
 711 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_LIST]], i64 32
 712 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
 713
 714   __builtin_va_list thelist;
 715   __builtin_va_start(thelist, n);
 716   struct HVA h = __builtin_va_arg(thelist, struct HVA);
 717   return h.b;
 718 }
 719
 720 struct TooBigHVA {
 721   int32x4_t a, b, c, d, e;
 722 };
 723
 724 int32x4_t test_toobig_hva(int n, ...) {
 725 // CHECK-LE-LABEL: define{{.*}} <4 x i32> @test_toobig_hva(i32 noundef %n, ...)
 726 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
 727 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
 728
 729   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
 730   // of stack consumed.
 731 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i64 8
 732 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
 733
 734 // CHECK-LE: [[HVAPTR:%.*]] = load ptr, ptr [[CURLIST]]
 735   __builtin_va_list thelist;
 736   __builtin_va_start(thelist, n);
 737   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
 738   return h.d;
 739 }
 740
 741 typedef __attribute__((__ext_vector_type__(3))) float float32x3_t;
 742 typedef struct { float32x3_t arr[4]; } HFAv3;
 743
 744 float32x3_t test_hva_v3(int n, ...) {
 745 // CHECK-LE-LABEL: define{{.*}} <3 x float> @test_hva_v3(i32 noundef %n, ...)
 746 // CHECK-LE: [[THELIST:%.*]] = alloca ptr
 747 // CHECK-LE: [[CURLIST:%.*]] = load ptr, ptr [[THELIST]]
 748
 749   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
 750   // must be properly aligned.
 751
 752 // CHECK-LE: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[CURLIST]], i32 15
 753 // CHECK-LE: [[ALIGNED_LIST:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[GEP]], i64 -16)
 754 // CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_LIST]], i64 64
 755 // CHECK-LE: store ptr [[NEXTLIST]], ptr [[THELIST]]
 756
 757   __builtin_va_list l;
 758   __builtin_va_start(l, n);
 759   HFAv3 r = __builtin_va_arg(l, HFAv3);
 760   return r.arr[2];
 761 }
 762
 763 float32x3_t test_hva_v3_call(HFAv3 *a) {
 764 // CHECK-LABEL: define{{.*}} <3 x float> @test_hva_v3_call(ptr noundef %a)
 765 // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 noundef 1, [4 x <4 x float>] {{.*}})
 766   return test_hva_v3(1, *a);
 767 }