test/CodeGen/AArch64/arm64-abi_align.ll

   1 ; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -frame-pointer=all | FileCheck %s
   2 ; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -O0 -frame-pointer=all -fast-isel | FileCheck -check-prefix=FAST %s
   3
   4 ; rdar://12648441
   5 ; Generated from arm64-arguments.c with -O2.
   6 ; Test passing structs with size < 8, < 16 and > 16
   7 ; with alignment of 16 and without
   8
   9 ; Structs with size < 8
  10 %struct.s38 = type { i32, i16 }
  11 ; With alignment of 16, the size will be padded to multiple of 16 bytes.
  12 %struct.s39 = type { i32, i16, [10 x i8] }
  13 ; Structs with size < 16
  14 %struct.s40 = type { i32, i16, i32, i16 }
  15 %struct.s41 = type { i32, i16, i32, i16 }
  16 ; Structs with size > 16
  17 %struct.s42 = type { i32, i16, i32, i16, i32, i16 }
  18 %struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
  19
  20 @g38 = common global %struct.s38 zeroinitializer, align 4
  21 @g38_2 = common global %struct.s38 zeroinitializer, align 4
  22 @g39 = common global %struct.s39 zeroinitializer, align 16
  23 @g39_2 = common global %struct.s39 zeroinitializer, align 16
  24 @g40 = common global %struct.s40 zeroinitializer, align 4
  25 @g40_2 = common global %struct.s40 zeroinitializer, align 4
  26 @g41 = common global %struct.s41 zeroinitializer, align 16
  27 @g41_2 = common global %struct.s41 zeroinitializer, align 16
  28 @g42 = common global %struct.s42 zeroinitializer, align 4
  29 @g42_2 = common global %struct.s42 zeroinitializer, align 4
  30 @g43 = common global %struct.s43 zeroinitializer, align 16
  31 @g43_2 = common global %struct.s43 zeroinitializer, align 16
  32
  33 ; structs with size < 8 bytes, passed via i64 in x1 and x2
  34 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
  35 entry:
  36 ; CHECK-LABEL: f38
  37 ; CHECK: add w[[A:[0-9]+]], w1, w0
  38 ; CHECK: add {{w[0-9]+}}, w[[A]], w2
  39   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
  40   %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
  41   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
  42   %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
  43   %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
  44   %sext = trunc i64 %sext8 to i32
  45   %conv = ashr exact i32 %sext, 16
  46   %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
  47   %sext10 = trunc i64 %sext1011 to i32
  48   %conv6 = ashr exact i32 %sext10, 16
  49   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
  50   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
  51   %add4 = add i32 %add3, %conv
  52   %add7 = add i32 %add4, %conv6
  53   ret i32 %add7
  54 }
  55
  56 define i32 @caller38() #1 {
  57 entry:
  58 ; CHECK-LABEL: caller38
  59 ; CHECK: ldr x1,
  60 ; CHECK: ldr x2,
  61   %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
  62   %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
  63   %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
  64   ret i32 %call
  65 }
  66
  67 declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
  68                 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
  69
  70 ; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
  71 ; i9 at [sp]
  72 define i32 @caller38_stack() #1 {
  73 entry:
  74 ; CHECK-LABEL: caller38_stack
  75 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
  76 ; CHECK: mov w[[C:[0-9]+]], #9
  77 ; CHECK: str w[[C]], [sp]
  78   %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
  79   %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
  80   %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
  81                                    i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
  82   ret i32 %call
  83 }
  84
  85 ; structs with size < 8 bytes, alignment of 16
  86 ; passed via i128 in x1 and x3
  87 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
  88 entry:
  89 ; CHECK-LABEL: f39
  90 ; CHECK: add w[[A:[0-9]+]], w1, w0
  91 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
  92   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
  93   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
  94   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
  95   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
  96   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
  97   %sext = trunc i128 %sext8 to i32
  98   %conv = ashr exact i32 %sext, 16
  99   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
 100   %sext10 = trunc i128 %sext1011 to i32
 101   %conv6 = ashr exact i32 %sext10, 16
 102   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
 103   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
 104   %add4 = add i32 %add3, %conv
 105   %add7 = add i32 %add4, %conv6
 106   ret i32 %add7
 107 }
 108
 109 define i32 @caller39() #1 {
 110 entry:
 111 ; CHECK-LABEL: caller39
 112 ; CHECK: ldp x1, x2,
 113 ; CHECK: ldp x3, x4,
 114   %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
 115   %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 116   %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
 117   ret i32 %call
 118 }
 119
 120 declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 121                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
 122
 123 ; structs with size < 8 bytes, alignment 16
 124 ; passed on stack at [sp+16] and [sp+32]
 125 define i32 @caller39_stack() #1 {
 126 entry:
 127 ; CHECK-LABEL: caller39_stack
 128 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
 129 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 130 ; CHECK: mov w[[C:[0-9]+]], #9
 131 ; CHECK: str w[[C]], [sp]
 132   %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
 133   %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 134   %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
 135                                    i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
 136   ret i32 %call
 137 }
 138
 139 ; structs with size < 16 bytes
 140 ; passed via i128 in x1 and x3
 141 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
 142 entry:
 143 ; CHECK-LABEL: f40
 144 ; CHECK: add w[[A:[0-9]+]], w1, w0
 145 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
 146   %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
 147   %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
 148   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
 149   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
 150   %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
 151   %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
 152   %sext = trunc i64 %sext8 to i32
 153   %conv = ashr exact i32 %sext, 16
 154   %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
 155   %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
 156   %sext10 = trunc i64 %sext1011 to i32
 157   %conv6 = ashr exact i32 %sext10, 16
 158   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
 159   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
 160   %add4 = add i32 %add3, %conv
 161   %add7 = add i32 %add4, %conv6
 162   ret i32 %add7
 163 }
 164
 165 define i32 @caller40() #1 {
 166 entry:
 167 ; CHECK-LABEL: caller40
 168 ; CHECK: ldp x1, x2,
 169 ; CHECK: ldp x3, x4,
 170   %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
 171   %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 172   %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
 173   ret i32 %call
 174 }
 175
 176 declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 177                 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
 178
 179 ; structs with size < 16 bytes
 180 ; passed on stack at [sp+8] and [sp+24]
 181 define i32 @caller40_stack() #1 {
 182 entry:
 183 ; CHECK-LABEL: caller40_stack
 184 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
 185 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
 186 ; CHECK: mov w[[C:[0-9]+]], #9
 187 ; CHECK: str w[[C]], [sp]
 188   %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
 189   %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 190   %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
 191                          i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
 192   ret i32 %call
 193 }
 194
 195 ; structs with size < 16 bytes, alignment of 16
 196 ; passed via i128 in x1 and x3
 197 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
 198 entry:
 199 ; CHECK-LABEL: f41
 200 ; CHECK: add w[[A:[0-9]+]], w1, w0
 201 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
 202   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
 203   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
 204   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
 205   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
 206   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
 207   %sext = trunc i128 %sext8 to i32
 208   %conv = ashr exact i32 %sext, 16
 209   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
 210   %sext10 = trunc i128 %sext1011 to i32
 211   %conv6 = ashr exact i32 %sext10, 16
 212   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
 213   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
 214   %add4 = add i32 %add3, %conv
 215   %add7 = add i32 %add4, %conv6
 216   ret i32 %add7
 217 }
 218
 219 define i32 @caller41() #1 {
 220 entry:
 221 ; CHECK-LABEL: caller41
 222 ; CHECK: ldp x1, x2,
 223 ; CHECK: ldp x3, x4,
 224   %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
 225   %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 226   %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
 227   ret i32 %call
 228 }
 229
 230 declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 231                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
 232
 233 ; structs with size < 16 bytes, alignment of 16
 234 ; passed on stack at [sp+16] and [sp+32]
 235 define i32 @caller41_stack() #1 {
 236 entry:
 237 ; CHECK-LABEL: caller41_stack
 238 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
 239 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 240 ; CHECK: mov w[[C:[0-9]+]], #9
 241 ; CHECK: str w[[C]], [sp]
 242   %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
 243   %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 244   %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
 245                             i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
 246   ret i32 %call
 247 }
 248
 249 ; structs with size of 22 bytes, passed indirectly in x1 and x2
 250 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
 251 entry:
 252 ; CHECK-LABEL: f42
 253 ; CHECK: ldr w[[A:[0-9]+]], [x1]
 254 ; CHECK: ldr w[[B:[0-9]+]], [x2]
 255 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
 256 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
 257 ; FAST: f42
 258 ; FAST: ldr w[[A:[0-9]+]], [x1]
 259 ; FAST: ldr w[[B:[0-9]+]], [x2]
 260 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
 261 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
 262   %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
 263   %0 = load i32, i32* %i1, align 4, !tbaa !0
 264   %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
 265   %1 = load i32, i32* %i2, align 4, !tbaa !0
 266   %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
 267   %2 = load i16, i16* %s, align 2, !tbaa !3
 268   %conv = sext i16 %2 to i32
 269   %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
 270   %3 = load i16, i16* %s5, align 2, !tbaa !3
 271   %conv6 = sext i16 %3 to i32
 272   %add = add i32 %0, %i
 273   %add3 = add i32 %add, %1
 274   %add4 = add i32 %add3, %conv
 275   %add7 = add i32 %add4, %conv6
 276   ret i32 %add7
 277 }
 278
 279 ; For s1, we allocate a 22-byte space, pass its address via x1
 280 define i32 @caller42() #3 {
 281 entry:
 282 ; CHECK-LABEL: caller42
 283 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
 284 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
 285 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #16]
 286 ; CHECK-DAG: str {{q[0-9]+}}, [sp]
 287 ; CHECK: add x1, sp, #32
 288 ; CHECK: mov x2, sp
 289 ; Space for s1 is allocated at sp+32
 290 ; Space for s2 is allocated at sp
 291
 292 ; FAST-LABEL: caller42
 293 ; FAST: sub sp, sp, #96
 294 ; Space for s1 is allocated at fp-24 = sp+56
 295 ; FAST: sub x[[A:[0-9]+]], x29, #24
 296 ; Call memcpy with size = 24 (0x18)
 297 ; FAST: mov {{x[0-9]+}}, #24
 298 ; Space for s2 is allocated at sp+32
 299 ; FAST: add x[[A:[0-9]+]], sp, #32
 300 ; FAST: bl _memcpy
 301   %tmp = alloca %struct.s42, align 4
 302   %tmp1 = alloca %struct.s42, align 4
 303   %0 = bitcast %struct.s42* %tmp to i8*
 304   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
 305   %1 = bitcast %struct.s42* %tmp1 to i8*
 306   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
 307   %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
 308   ret i32 %call
 309 }
 310
 311 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4
 312
 313 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 314                        i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
 315                        %struct.s42* nocapture %s2) #2
 316
 317 define i32 @caller42_stack() #3 {
 318 entry:
 319 ; CHECK-LABEL: caller42_stack
 320 ; CHECK: sub sp, sp, #112
 321 ; CHECK: add x29, sp, #96
 322 ; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16]
 323 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
 324 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
 325 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
 326 ; Space for s1 is allocated at x29-32 = sp+64
 327 ; Space for s2 is allocated at sp+32
 328 ; CHECK: add x[[B:[0-9]+]], sp, #32
 329 ; CHECK: str x[[B]], [sp, #16]
 330 ; CHECK: sub x[[A:[0-9]+]], x29, #32
 331 ; Address of s1 is passed on stack at sp+8
 332 ; CHECK: str x[[A]], [sp, #8]
 333 ; CHECK: mov w[[C:[0-9]+]], #9
 334 ; CHECK: str w[[C]], [sp]
 335
 336 ; FAST-LABEL: caller42_stack
 337 ; Space for s1 is allocated at fp-24
 338 ; FAST: sub x[[A:[0-9]+]], x29, #24
 339 ; Call memcpy with size = 24 (0x18)
 340 ; FAST: mov {{x[0-9]+}}, #24
 341 ; FAST: bl _memcpy
 342 ; Space for s2 is allocated at fp-48
 343 ; FAST: sub x[[B:[0-9]+]], x29, #48
 344 ; Call memcpy again
 345 ; FAST: bl _memcpy
 346 ; Address of s1 is passed on stack at sp+8
 347 ; FAST: str {{w[0-9]+}}, [sp]
 348 ; FAST: str {{x[0-9]+}}, [sp, #8]
 349 ; FAST: str {{x[0-9]+}}, [sp, #16]
 350   %tmp = alloca %struct.s42, align 4
 351   %tmp1 = alloca %struct.s42, align 4
 352   %0 = bitcast %struct.s42* %tmp to i8*
 353   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
 354   %1 = bitcast %struct.s42* %tmp1 to i8*
 355   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
 356   %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
 357                        i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
 358   ret i32 %call
 359 }
 360
 361 ; structs with size of 22 bytes, alignment of 16
 362 ; passed indirectly in x1 and x2
 363 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
 364 entry:
 365 ; CHECK-LABEL: f43
 366 ; CHECK: ldr w[[A:[0-9]+]], [x1]
 367 ; CHECK: ldr w[[B:[0-9]+]], [x2]
 368 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
 369 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
 370 ; FAST-LABEL: f43
 371 ; FAST: ldr w[[A:[0-9]+]], [x1]
 372 ; FAST: ldr w[[B:[0-9]+]], [x2]
 373 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
 374 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
 375   %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
 376   %0 = load i32, i32* %i1, align 4, !tbaa !0
 377   %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
 378   %1 = load i32, i32* %i2, align 4, !tbaa !0
 379   %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
 380   %2 = load i16, i16* %s, align 2, !tbaa !3
 381   %conv = sext i16 %2 to i32
 382   %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
 383   %3 = load i16, i16* %s5, align 2, !tbaa !3
 384   %conv6 = sext i16 %3 to i32
 385   %add = add i32 %0, %i
 386   %add3 = add i32 %add, %1
 387   %add4 = add i32 %add3, %conv
 388   %add7 = add i32 %add4, %conv6
 389   ret i32 %add7
 390 }
 391
 392 define i32 @caller43() #3 {
 393 entry:
 394 ; CHECK-LABEL: caller43
 395 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
 396 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
 397 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #16]
 398 ; CHECK-DAG: str {{q[0-9]+}}, [sp]
 399 ; CHECK: add x1, sp, #32
 400 ; CHECK: mov x2, sp
 401 ; Space for s1 is allocated at sp+32
 402 ; Space for s2 is allocated at sp
 403
 404 ; FAST-LABEL: caller43
 405 ; FAST: add x29, sp, #64
 406 ; Space for s1 is allocated at sp+32
 407 ; Space for s2 is allocated at sp
 408 ; FAST: str {{x[0-9]+}}, [sp, #32]
 409 ; FAST: str {{x[0-9]+}}, [sp, #40]
 410 ; FAST: str {{x[0-9]+}}, [sp, #48]
 411 ; FAST: str {{x[0-9]+}}, [sp, #56]
 412 ; FAST: str {{x[0-9]+}}, [sp]
 413 ; FAST: str {{x[0-9]+}}, [sp, #8]
 414 ; FAST: str {{x[0-9]+}}, [sp, #16]
 415 ; FAST: str {{x[0-9]+}}, [sp, #24]
 416 ; FAST: add x1, sp, #32
 417 ; FAST: mov x2, sp
 418   %tmp = alloca %struct.s43, align 16
 419   %tmp1 = alloca %struct.s43, align 16
 420   %0 = bitcast %struct.s43* %tmp to i8*
 421   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
 422   %1 = bitcast %struct.s43* %tmp1 to i8*
 423   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
 424   %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
 425   ret i32 %call
 426 }
 427
 428 declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 429                        i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
 430                        %struct.s43* nocapture %s2) #2
 431
 432 define i32 @caller43_stack() #3 {
 433 entry:
 434 ; CHECK-LABEL: caller43_stack
 435 ; CHECK: sub sp, sp, #112
 436 ; CHECK: add x29, sp, #96
 437 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16]
 438 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
 439 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
 440 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
 441 ; Space for s1 is allocated at x29-32 = sp+64
 442 ; Space for s2 is allocated at sp+32
 443 ; CHECK: add x[[B:[0-9]+]], sp, #32
 444 ; CHECK: str x[[B]], [sp, #16]
 445 ; CHECK: sub x[[A:[0-9]+]], x29, #32
 446 ; Address of s1 is passed on stack at sp+8
 447 ; CHECK: str x[[A]], [sp, #8]
 448 ; CHECK: mov w[[C:[0-9]+]], #9
 449 ; CHECK: str w[[C]], [sp]
 450
 451 ; FAST-LABEL: caller43_stack
 452 ; FAST: sub sp, sp, #112
 453 ; Space for s1 is allocated at fp-32 = sp+64
 454 ; Space for s2 is allocated at sp+32
 455 ; FAST: stur {{x[0-9]+}}, [x29, #-32]
 456 ; FAST: stur {{x[0-9]+}}, [x29, #-24]
 457 ; FAST: stur {{x[0-9]+}}, [x29, #-16]
 458 ; FAST: stur {{x[0-9]+}}, [x29, #-8]
 459 ; FAST: str {{x[0-9]+}}, [sp, #32]
 460 ; FAST: str {{x[0-9]+}}, [sp, #40]
 461 ; FAST: str {{x[0-9]+}}, [sp, #48]
 462 ; FAST: str {{x[0-9]+}}, [sp, #56]
 463 ; FAST: str {{w[0-9]+}}, [sp]
 464 ; Address of s1 is passed on stack at sp+8
 465 ; FAST: sub x[[A:[0-9]+]], x29, #32
 466 ; FAST: str x[[A]], [sp, #8]
 467 ; FAST: add x[[B:[0-9]+]], sp, #32
 468 ; FAST: str x[[B]], [sp, #16]
 469   %tmp = alloca %struct.s43, align 16
 470   %tmp1 = alloca %struct.s43, align 16
 471   %0 = bitcast %struct.s43* %tmp to i8*
 472   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
 473   %1 = bitcast %struct.s43* %tmp1 to i8*
 474   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
 475   %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
 476                        i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
 477   ret i32 %call
 478 }
 479
 480 ; rdar://13668927
 481 ; Check that we don't split an i128.
 482 declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 483                                i32 %i6, i32 %i7, i128 %s1, i32 %i8)
 484
 485 define i32 @i128_split() {
 486 entry:
 487 ; CHECK-LABEL: i128_split
 488 ; "i128 %0" should be on stack at [sp].
 489 ; "i32 8" should be on stack at [sp, #16].
 490 ; CHECK: str {{w[0-9]+}}, [sp, #16]
 491 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
 492 ; FAST-LABEL: i128_split
 493 ; FAST: sub sp, sp
 494 ; FAST: mov x[[ADDR:[0-9]+]], sp
 495 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
 496 ; Load/Store opt is disabled with -O0, so the i128 is split.
 497 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
 498 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
 499   %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
 500   %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
 501                                            i32 6, i32 7, i128 %0, i32 8) #5
 502   ret i32 %call
 503 }
 504
 505 declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 506                                i32 %i6, i32 %i7, i64 %s1, i32 %i8)
 507
 508 define i32 @i64_split() {
 509 entry:
 510 ; CHECK-LABEL: i64_split
 511 ; "i64 %0" should be in register x7.
 512 ; "i32 8" should be on stack at [sp].
 513 ; CHECK: ldr x7, [{{x[0-9]+}}]
 514 ; CHECK: str {{w[0-9]+}}, [sp]
 515 ; FAST-LABEL: i64_split
 516 ; FAST: ldr x7, [{{x[0-9]+}}]
 517 ; FAST: mov x[[R0:[0-9]+]], sp
 518 ; FAST: mov w[[R1:[0-9]+]], #8
 519 ; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
 520   %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
 521   %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
 522                                     i32 6, i32 7, i64 %0, i32 8) #5
 523   ret i32 %call
 524 }
 525
 526 attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 527 attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 528 attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 529 attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 530 attributes #4 = { nounwind }
 531 attributes #5 = { nobuiltin }
 532
 533 !0 = !{!"int", !1}
 534 !1 = !{!"omnipotent char", !2}
 535 !2 = !{!"Simple C/C++ TBAA"}
 536 !3 = !{!"short", !1}
 537 !4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}