1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-load-store-renaming=true < %s -mtriple=arm64-apple-ios7.0.0 -mcpu=cyclone -enable-misched=false | FileCheck %s
5 ; Here we have 9 fixed integer arguments the 9th argument in on stack, the
6 ; varargs start right after at 8-byte alignment.
7 define void @fn9(ptr %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
10 ; CHECK-NEXT: sub sp, sp, #64
11 ; CHECK-NEXT: ldr w8, [sp, #64]
12 ; CHECK-NEXT: stp w2, w1, [sp, #52]
13 ; CHECK-NEXT: stp w4, w3, [sp, #44]
14 ; CHECK-NEXT: stp w6, w5, [sp, #36]
15 ; CHECK-NEXT: str w7, [sp, #32]
16 ; CHECK-NEXT: str w8, [x0]
17 ; CHECK-NEXT: add x8, sp, #72
18 ; CHECK-NEXT: add x8, x8, #8
19 ; CHECK-NEXT: ldr w9, [sp, #72]
20 ; CHECK-NEXT: str w9, [sp, #20]
21 ; CHECK-NEXT: ldr w9, [x8], #8
22 ; CHECK-NEXT: str w9, [sp, #16]
23 ; CHECK-NEXT: ldr w9, [x8], #8
24 ; CHECK-NEXT: str x8, [sp, #24]
25 ; CHECK-NEXT: str w9, [sp, #12]
26 ; CHECK-NEXT: add sp, sp, #64
28 %1 = alloca i32, align 4
29 %2 = alloca i32, align 4
30 %3 = alloca i32, align 4
31 %4 = alloca i32, align 4
32 %5 = alloca i32, align 4
33 %6 = alloca i32, align 4
34 %7 = alloca i32, align 4
35 %8 = alloca i32, align 4
36 %9 = alloca i32, align 4
37 %args = alloca ptr, align 8
38 %a10 = alloca i32, align 4
39 %a11 = alloca i32, align 4
40 %a12 = alloca i32, align 4
41 store i32 %a2, ptr %2, align 4
42 store i32 %a3, ptr %3, align 4
43 store i32 %a4, ptr %4, align 4
44 store i32 %a5, ptr %5, align 4
45 store i32 %a6, ptr %6, align 4
46 store i32 %a7, ptr %7, align 4
47 store i32 %a8, ptr %8, align 4
48 store i32 %a9, ptr %9, align 4
49 store i32 %a9, ptr %a1
50 call void @llvm.va_start(ptr %args)
51 %10 = va_arg ptr %args, i32
52 store i32 %10, ptr %a10, align 4
53 %11 = va_arg ptr %args, i32
54 store i32 %11, ptr %a11, align 4
55 %12 = va_arg ptr %args, i32
56 store i32 %12, ptr %a12, align 4
60 declare void @llvm.va_start(ptr) nounwind
62 define i32 @main() nounwind ssp {
65 ; CHECK-NEXT: sub sp, sp, #96
66 ; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
67 ; CHECK-NEXT: mov w9, #1
68 ; CHECK-NEXT: mov w8, #2
69 ; CHECK-NEXT: stp w8, w9, [sp, #72]
70 ; CHECK-NEXT: mov w9, #3
71 ; CHECK-NEXT: mov w8, #4
72 ; CHECK-NEXT: stp w8, w9, [sp, #64]
73 ; CHECK-NEXT: mov w9, #5
74 ; CHECK-NEXT: mov w8, #6
75 ; CHECK-NEXT: stp w8, w9, [sp, #56]
76 ; CHECK-NEXT: mov w9, #7
77 ; CHECK-NEXT: mov w8, #8
78 ; CHECK-NEXT: stp w8, w9, [sp, #48]
79 ; CHECK-NEXT: mov w8, #9
80 ; CHECK-NEXT: mov w9, #10
81 ; CHECK-NEXT: stp w9, w8, [sp, #40]
82 ; CHECK-NEXT: mov w10, #11
83 ; CHECK-NEXT: mov w11, #12
84 ; CHECK-NEXT: stp w11, w10, [sp, #32]
85 ; CHECK-NEXT: stp x10, x11, [sp, #16]
86 ; CHECK-NEXT: str x9, [sp, #8]
87 ; CHECK-NEXT: str w8, [sp]
88 ; CHECK-NEXT: add x0, sp, #76
89 ; CHECK-NEXT: mov w1, #2
90 ; CHECK-NEXT: mov w2, #3
91 ; CHECK-NEXT: mov w3, #4
92 ; CHECK-NEXT: mov w4, #5
93 ; CHECK-NEXT: mov w5, #6
94 ; CHECK-NEXT: mov w6, #7
95 ; CHECK-NEXT: mov w7, #8
97 ; CHECK-NEXT: mov w0, #0
98 ; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
99 ; CHECK-NEXT: add sp, sp, #96
101 %a1 = alloca i32, align 4
102 %a2 = alloca i32, align 4
103 %a3 = alloca i32, align 4
104 %a4 = alloca i32, align 4
105 %a5 = alloca i32, align 4
106 %a6 = alloca i32, align 4
107 %a7 = alloca i32, align 4
108 %a8 = alloca i32, align 4
109 %a9 = alloca i32, align 4
110 %a10 = alloca i32, align 4
111 %a11 = alloca i32, align 4
112 %a12 = alloca i32, align 4
113 store i32 1, ptr %a1, align 4
114 store i32 2, ptr %a2, align 4
115 store i32 3, ptr %a3, align 4
116 store i32 4, ptr %a4, align 4
117 store i32 5, ptr %a5, align 4
118 store i32 6, ptr %a6, align 4
119 store i32 7, ptr %a7, align 4
120 store i32 8, ptr %a8, align 4
121 store i32 9, ptr %a9, align 4
122 store i32 10, ptr %a10, align 4
123 store i32 11, ptr %a11, align 4
124 store i32 12, ptr %a12, align 4
125 %1 = load i32, ptr %a1, align 4
126 %2 = load i32, ptr %a2, align 4
127 %3 = load i32, ptr %a3, align 4
128 %4 = load i32, ptr %a4, align 4
129 %5 = load i32, ptr %a5, align 4
130 %6 = load i32, ptr %a6, align 4
131 %7 = load i32, ptr %a7, align 4
132 %8 = load i32, ptr %a8, align 4
133 %9 = load i32, ptr %a9, align 4
134 %10 = load i32, ptr %a10, align 4
135 %11 = load i32, ptr %a11, align 4
136 %12 = load i32, ptr %a12, align 4
137 call void (ptr, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(ptr %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
142 @.str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1
143 define void @foo(ptr %fmt, ...) nounwind {
145 ; CHECK: ; %bb.0: ; %entry
146 ; CHECK-NEXT: sub sp, sp, #48
147 ; CHECK-NEXT: ldr w8, [sp, #48]
148 ; CHECK-NEXT: str w8, [sp, #28]
149 ; CHECK-NEXT: add x8, sp, #48
150 ; CHECK-NEXT: add x8, x8, #23
151 ; CHECK-NEXT: and x8, x8, #0xfffffffffffffff0
152 ; CHECK-NEXT: add x9, x8, #16
153 ; CHECK-NEXT: stp x9, x0, [sp, #32]
154 ; CHECK-NEXT: ldr q0, [x8]
155 ; CHECK-NEXT: str q0, [sp], #48
158 %fmt.addr = alloca ptr, align 8
159 %args = alloca ptr, align 8
160 %vc = alloca i32, align 4
161 %vv = alloca <4 x i32>, align 16
162 store ptr %fmt, ptr %fmt.addr, align 8
163 call void @llvm.va_start(ptr %args)
164 %0 = va_arg ptr %args, i32
165 store i32 %0, ptr %vc, align 4
166 %1 = va_arg ptr %args, <4 x i32>
167 store <4 x i32> %1, ptr %vv, align 16
171 define void @bar(i32 %x, <4 x i32> %y) nounwind {
173 ; CHECK: ; %bb.0: ; %entry
174 ; CHECK-NEXT: sub sp, sp, #80
175 ; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
176 ; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
177 ; CHECK-NEXT: str w0, [sp, #60]
178 ; CHECK-NEXT: stp q0, q0, [sp, #16]
179 ; CHECK-NEXT: str x0, [sp]
181 ; CHECK-NEXT: adrp x0, l_.str@PAGE
183 ; CHECK-NEXT: add x0, x0, l_.str@PAGEOFF
184 ; CHECK-NEXT: bl _foo
185 ; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
186 ; CHECK-NEXT: add sp, sp, #80
188 ; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
190 %x.addr = alloca i32, align 4
191 %y.addr = alloca <4 x i32>, align 16
192 store i32 %x, ptr %x.addr, align 4
193 store <4 x i32> %y, ptr %y.addr, align 16
194 %0 = load i32, ptr %x.addr, align 4
195 %1 = load <4 x i32>, ptr %y.addr, align 16
196 call void (ptr, ...) @foo(ptr @.str, i32 %0, <4 x i32> %1)
201 ; When passing 16-byte aligned small structs as vararg, make sure the caller
202 ; side is 16-byte aligned on stack.
203 %struct.s41 = type { i32, i16, i32, i16 }
204 define void @foo2(ptr %fmt, ...) nounwind {
206 ; CHECK: ; %bb.0: ; %entry
207 ; CHECK-NEXT: sub sp, sp, #48
208 ; CHECK-NEXT: ldr w8, [sp, #48]
209 ; CHECK-NEXT: str w8, [sp, #28]
210 ; CHECK-NEXT: add x8, sp, #48
211 ; CHECK-NEXT: add x8, x8, #23
212 ; CHECK-NEXT: and x8, x8, #0xfffffffffffffff0
213 ; CHECK-NEXT: add x9, x8, #16
214 ; CHECK-NEXT: stp x9, x0, [sp, #32]
215 ; CHECK-NEXT: ldr q0, [x8]
216 ; CHECK-NEXT: str q0, [sp], #48
219 %fmt.addr = alloca ptr, align 8
220 %args = alloca ptr, align 8
221 %vc = alloca i32, align 4
222 %vs = alloca %struct.s41, align 16
223 store ptr %fmt, ptr %fmt.addr, align 8
224 call void @llvm.va_start(ptr %args)
225 %0 = va_arg ptr %args, i32
226 store i32 %0, ptr %vc, align 4
227 %ap.cur = load ptr, ptr %args
228 %1 = getelementptr i8, ptr %ap.cur, i32 15
229 %2 = ptrtoint ptr %1 to i64
231 %ap.align = inttoptr i64 %3 to ptr
232 %ap.next = getelementptr i8, ptr %ap.align, i32 16
233 store ptr %ap.next, ptr %args
234 call void @llvm.memcpy.p0.p0.i64(ptr align 16 %vs, ptr align 16 %ap.align, i64 16, i1 false)
237 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
239 define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
241 ; CHECK: ; %bb.0: ; %entry
242 ; CHECK-NEXT: sub sp, sp, #80
243 ; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
244 ; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
245 ; CHECK-NEXT: str w0, [sp, #60]
246 ; CHECK-NEXT: stp x1, x2, [sp, #32]
247 ; CHECK-NEXT: stp x1, x2, [sp, #16]
248 ; CHECK-NEXT: str x0, [sp]
250 ; CHECK-NEXT: adrp x0, l_.str@PAGE
252 ; CHECK-NEXT: add x0, x0, l_.str@PAGEOFF
253 ; CHECK-NEXT: bl _foo2
254 ; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
255 ; CHECK-NEXT: add sp, sp, #80
257 ; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3
259 %x.addr = alloca i32, align 4
260 %s41 = alloca %struct.s41, align 16
261 store i32 %x, ptr %x.addr, align 4
262 store i128 %s41.coerce, ptr %s41, align 1
263 %0 = load i32, ptr %x.addr, align 4
264 %1 = load i128, ptr %s41, align 1
265 call void (ptr, ...) @foo2(ptr @.str, i32 %0, i128 %1)