1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefix=DARWIN
3 ; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefix=WINDOWS
5 declare void @simple_fn()
6 define void @tail_call() {
7 ; DARWIN-LABEL: name: tail_call
8 ; DARWIN: bb.1 (%ir-block.0):
9 ; DARWIN-NEXT: TCRETURNdi @simple_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
10 ; WINDOWS-LABEL: name: tail_call
11 ; WINDOWS: bb.1 (%ir-block.0):
12 ; WINDOWS-NEXT: TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
13 tail call void @simple_fn()
17 ; We should get a TCRETURNri here.
18 ; FIXME: We don't need the COPY.
19 define void @indirect_tail_call(ptr %func) {
20 ; DARWIN-LABEL: name: indirect_tail_call
21 ; DARWIN: bb.1 (%ir-block.0):
22 ; DARWIN-NEXT: liveins: $x0
24 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
25 ; DARWIN-NEXT: TCRETURNri [[COPY]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp
26 ; WINDOWS-LABEL: name: indirect_tail_call
27 ; WINDOWS: bb.1 (%ir-block.0):
28 ; WINDOWS-NEXT: liveins: $x0
29 ; WINDOWS-NEXT: {{ $}}
30 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
31 ; WINDOWS-NEXT: TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
32 tail call void %func()
36 declare void @outgoing_args_fn(i32)
37 define void @test_outgoing_args(i32 %a) {
38 ; DARWIN-LABEL: name: test_outgoing_args
39 ; DARWIN: bb.1 (%ir-block.0):
40 ; DARWIN-NEXT: liveins: $w0
42 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
43 ; DARWIN-NEXT: $w0 = COPY [[COPY]](s32)
44 ; DARWIN-NEXT: TCRETURNdi @outgoing_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0
45 ; WINDOWS-LABEL: name: test_outgoing_args
46 ; WINDOWS: bb.1 (%ir-block.0):
47 ; WINDOWS-NEXT: liveins: $w0
48 ; WINDOWS-NEXT: {{ $}}
49 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
50 ; WINDOWS-NEXT: $w0 = COPY [[COPY]](s32)
51 ; WINDOWS-NEXT: TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
52 tail call void @outgoing_args_fn(i32 %a)
56 ; Verify that we create frame indices for memory arguments in tail calls.
57 ; We get a bunch of copies here which are unused and thus eliminated. So, let's
58 ; just focus on what matters, which is that we get a G_FRAME_INDEX.
59 declare void @outgoing_stack_args_fn(<4 x half>)
60 define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
61 ; DARWIN-LABEL: name: test_outgoing_stack_args
62 ; DARWIN: bb.1 (%ir-block.1):
63 ; DARWIN-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
65 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
66 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
67 ; DARWIN-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
68 ; DARWIN-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
69 ; DARWIN-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
70 ; DARWIN-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
71 ; DARWIN-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
72 ; DARWIN-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
73 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
74 ; DARWIN-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
75 ; DARWIN-NEXT: $d0 = COPY [[LOAD]](<4 x s16>)
76 ; DARWIN-NEXT: TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0
77 ; WINDOWS-LABEL: name: test_outgoing_stack_args
78 ; WINDOWS: bb.1 (%ir-block.1):
79 ; WINDOWS-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
80 ; WINDOWS-NEXT: {{ $}}
81 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
82 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
83 ; WINDOWS-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
84 ; WINDOWS-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
85 ; WINDOWS-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
86 ; WINDOWS-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
87 ; WINDOWS-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
88 ; WINDOWS-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
89 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
90 ; WINDOWS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
91 ; WINDOWS-NEXT: $d0 = COPY [[LOAD]](<4 x s16>)
92 ; WINDOWS-NEXT: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
93 tail call void @outgoing_stack_args_fn(<4 x half> %arg)
97 ; Verify that we don't tail call when we cannot fit arguments on the caller's
99 declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s)
100 define i32 @test_too_big_stack() {
101 ; DARWIN-LABEL: name: test_too_big_stack
102 ; DARWIN: bb.1.entry:
103 ; DARWIN-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
104 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
105 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
106 ; DARWIN-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $sp, implicit $sp
107 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
108 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
109 ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
110 ; DARWIN-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
111 ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
112 ; DARWIN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
113 ; DARWIN-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
114 ; DARWIN-NEXT: $x0 = COPY [[DEF]](s64)
115 ; DARWIN-NEXT: $x1 = COPY [[DEF]](s64)
116 ; DARWIN-NEXT: $x2 = COPY [[DEF]](s64)
117 ; DARWIN-NEXT: $x3 = COPY [[DEF]](s64)
118 ; DARWIN-NEXT: $x4 = COPY [[DEF]](s64)
119 ; DARWIN-NEXT: $x5 = COPY [[DEF]](s64)
120 ; DARWIN-NEXT: $x6 = COPY [[DEF]](s64)
121 ; DARWIN-NEXT: $x7 = COPY [[DEF]](s64)
122 ; DARWIN-NEXT: BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
123 ; DARWIN-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp
124 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
125 ; DARWIN-NEXT: $w0 = COPY [[COPY1]](s32)
126 ; DARWIN-NEXT: RET_ReallyLR implicit $w0
127 ; WINDOWS-LABEL: name: test_too_big_stack
128 ; WINDOWS: bb.1.entry:
129 ; WINDOWS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
130 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
131 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
132 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
133 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
134 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
135 ; WINDOWS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
136 ; WINDOWS-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
137 ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
138 ; WINDOWS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
139 ; WINDOWS-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
140 ; WINDOWS-NEXT: $x0 = COPY [[DEF]](s64)
141 ; WINDOWS-NEXT: $x1 = COPY [[DEF]](s64)
142 ; WINDOWS-NEXT: $x2 = COPY [[DEF]](s64)
143 ; WINDOWS-NEXT: $x3 = COPY [[DEF]](s64)
144 ; WINDOWS-NEXT: $x4 = COPY [[DEF]](s64)
145 ; WINDOWS-NEXT: $x5 = COPY [[DEF]](s64)
146 ; WINDOWS-NEXT: $x6 = COPY [[DEF]](s64)
147 ; WINDOWS-NEXT: $x7 = COPY [[DEF]](s64)
148 ; WINDOWS-NEXT: BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
149 ; WINDOWS-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
150 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
151 ; WINDOWS-NEXT: $w0 = COPY [[COPY1]](s32)
152 ; WINDOWS-NEXT: RET_ReallyLR implicit $w0
154 %call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9)
158 ; Right now, we don't want to tail call callees with nonvoid return types, since
159 ; call lowering will insert COPYs after the call.
160 ; TODO: Support this.
161 declare i32 @nonvoid_ret()
162 define i32 @test_nonvoid_ret() {
163 ; DARWIN-LABEL: name: test_nonvoid_ret
164 ; DARWIN: bb.1 (%ir-block.0):
165 ; DARWIN-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
166 ; WINDOWS-LABEL: name: test_nonvoid_ret
167 ; WINDOWS: bb.1 (%ir-block.0):
168 ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
169 %call = tail call i32 @nonvoid_ret()
173 declare void @varargs(i32, double, i64, ...)
174 define void @test_varargs() {
175 ; DARWIN-LABEL: name: test_varargs
176 ; DARWIN: bb.1 (%ir-block.0):
177 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
178 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
179 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
180 ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
181 ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
182 ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
183 ; DARWIN-NEXT: TCRETURNdi @varargs, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
184 ; WINDOWS-LABEL: name: test_varargs
185 ; WINDOWS: bb.1 (%ir-block.0):
186 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
187 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
188 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
189 ; WINDOWS-NEXT: $w0 = COPY [[C]](s32)
190 ; WINDOWS-NEXT: $x1 = COPY [[C1]](s64)
191 ; WINDOWS-NEXT: $x2 = COPY [[C2]](s64)
192 ; WINDOWS-NEXT: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2
193 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
197 ; Darwin should not tail call here, because the last parameter to @varargs is
198 ; not fixed. So, it's passed on the stack, which will make us not fit. On
199 ; Windows, it's passed in a register, so it's safe to tail call.
200 define void @test_varargs_2() {
202 ; DARWIN-LABEL: name: test_varargs_2
203 ; DARWIN: bb.1 (%ir-block.0):
204 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
205 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
206 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
207 ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
208 ; DARWIN-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
209 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
210 ; DARWIN-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
211 ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
212 ; DARWIN-NEXT: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
213 ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
214 ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
215 ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
216 ; DARWIN-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
217 ; DARWIN-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
218 ; DARWIN-NEXT: RET_ReallyLR
219 ; WINDOWS-LABEL: name: test_varargs_2
220 ; WINDOWS: bb.1 (%ir-block.0):
221 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
222 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
223 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
224 ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
225 ; WINDOWS-NEXT: $w0 = COPY [[C]](s32)
226 ; WINDOWS-NEXT: $x1 = COPY [[C1]](s64)
227 ; WINDOWS-NEXT: $x2 = COPY [[C2]](s64)
228 ; WINDOWS-NEXT: $x3 = COPY [[C3]](s64)
229 ; WINDOWS-NEXT: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
230 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
234 ; Same deal here, even though we have enough room to fit. On Darwin, we'll pass
235 ; the last argument to @varargs on the stack. We don't allow tail calling
236 ; varargs arguments that are on the stack.
237 define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
239 ; DARWIN-LABEL: name: test_varargs_3
240 ; DARWIN: bb.1 (%ir-block.1):
241 ; DARWIN-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
242 ; DARWIN-NEXT: {{ $}}
243 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
244 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
245 ; DARWIN-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
246 ; DARWIN-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
247 ; DARWIN-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
248 ; DARWIN-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
249 ; DARWIN-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
250 ; DARWIN-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
251 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
252 ; DARWIN-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
253 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
254 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
255 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
256 ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
257 ; DARWIN-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
258 ; DARWIN-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $sp
259 ; DARWIN-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
260 ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
261 ; DARWIN-NEXT: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
262 ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
263 ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
264 ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
265 ; DARWIN-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
266 ; DARWIN-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
267 ; DARWIN-NEXT: RET_ReallyLR
268 ; WINDOWS-LABEL: name: test_varargs_3
269 ; WINDOWS: bb.1 (%ir-block.1):
270 ; WINDOWS-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
271 ; WINDOWS-NEXT: {{ $}}
272 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
273 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
274 ; WINDOWS-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
275 ; WINDOWS-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
276 ; WINDOWS-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
277 ; WINDOWS-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
278 ; WINDOWS-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
279 ; WINDOWS-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
280 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
281 ; WINDOWS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
282 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
283 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
284 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
285 ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
286 ; WINDOWS-NEXT: $w0 = COPY [[C]](s32)
287 ; WINDOWS-NEXT: $x1 = COPY [[C1]](s64)
288 ; WINDOWS-NEXT: $x2 = COPY [[C2]](s64)
289 ; WINDOWS-NEXT: $x3 = COPY [[C3]](s64)
290 ; WINDOWS-NEXT: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
291 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
295 ; Unsupported calling convention for tail calls. Make sure we never tail call
297 declare ghccc void @bad_call_conv_fn()
298 define void @test_bad_call_conv() {
299 ; DARWIN-LABEL: name: test_bad_call_conv
300 ; DARWIN: bb.1 (%ir-block.0):
301 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
302 ; DARWIN-NEXT: BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
303 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
304 ; DARWIN-NEXT: RET_ReallyLR
305 ; WINDOWS-LABEL: name: test_bad_call_conv
306 ; WINDOWS: bb.1 (%ir-block.0):
307 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
308 ; WINDOWS-NEXT: BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
309 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
310 ; WINDOWS-NEXT: RET_ReallyLR
311 tail call ghccc void @bad_call_conv_fn()
315 ; Shouldn't tail call when the caller has byval arguments.
316 define void @test_byval(ptr byval(i8) %ptr) {
317 ; DARWIN-LABEL: name: test_byval
318 ; DARWIN: bb.1 (%ir-block.0):
319 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
320 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
321 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
322 ; DARWIN-NEXT: BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
323 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
324 ; DARWIN-NEXT: RET_ReallyLR
325 ; WINDOWS-LABEL: name: test_byval
326 ; WINDOWS: bb.1 (%ir-block.0):
327 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
328 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
329 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
330 ; WINDOWS-NEXT: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
331 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
332 ; WINDOWS-NEXT: RET_ReallyLR
333 tail call void @simple_fn()
337 ; Shouldn't tail call when the caller has inreg arguments.
338 define void @test_inreg(ptr inreg %ptr) {
339 ; DARWIN-LABEL: name: test_inreg
340 ; DARWIN: bb.1 (%ir-block.0):
341 ; DARWIN-NEXT: liveins: $x0
342 ; DARWIN-NEXT: {{ $}}
343 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
344 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
345 ; DARWIN-NEXT: BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
346 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
347 ; DARWIN-NEXT: RET_ReallyLR
348 ; WINDOWS-LABEL: name: test_inreg
349 ; WINDOWS: bb.1 (%ir-block.0):
350 ; WINDOWS-NEXT: liveins: $x0
351 ; WINDOWS-NEXT: {{ $}}
352 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
353 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
354 ; WINDOWS-NEXT: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
355 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
356 ; WINDOWS-NEXT: RET_ReallyLR
357 tail call void @simple_fn()
361 declare fastcc void @fast_fn()
362 define void @test_mismatched_caller() {
363 ; DARWIN-LABEL: name: test_mismatched_caller
364 ; DARWIN: bb.1 (%ir-block.0):
365 ; DARWIN-NEXT: TCRETURNdi @fast_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
366 ; WINDOWS-LABEL: name: test_mismatched_caller
367 ; WINDOWS: bb.1 (%ir-block.0):
368 ; WINDOWS-NEXT: TCRETURNdi @fast_fn, 0, csr_aarch64_aapcs, implicit $sp
369 tail call fastcc void @fast_fn()
373 ; Verify that lifetime markers and llvm.assume don't impact tail calling.
374 declare void @llvm.assume(i1)
375 define void @test_assume() local_unnamed_addr {
376 ; DARWIN-LABEL: name: test_assume
377 ; DARWIN: bb.1.entry:
378 ; DARWIN-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
379 ; WINDOWS-LABEL: name: test_assume
380 ; WINDOWS: bb.1.entry:
381 ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
383 %x = tail call i32 @nonvoid_ret()
384 %y = icmp ne i32 %x, 0
385 tail call void @llvm.assume(i1 %y)
389 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
390 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
391 define void @test_lifetime() local_unnamed_addr {
392 ; DARWIN-LABEL: name: test_lifetime
393 ; DARWIN: bb.1.entry:
394 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
395 ; DARWIN-NEXT: LIFETIME_START %stack.0.t
396 ; DARWIN-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
397 ; WINDOWS-LABEL: name: test_lifetime
398 ; WINDOWS: bb.1.entry:
399 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
400 ; WINDOWS-NEXT: LIFETIME_START %stack.0.t
401 ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
403 %t = alloca i8, align 1
404 call void @llvm.lifetime.start.p0(i64 1, ptr %t)
405 %x = tail call i32 @nonvoid_ret()
406 %y = icmp ne i32 %x, 0
407 tail call void @llvm.lifetime.end.p0(i64 1, ptr %t)
411 ; We can tail call when the callee swiftself is the same as the caller one.
412 ; It would be nice to move this to swiftself.ll, but it's important to verify
413 ; that we get the COPY that makes this safe in the first place.
415 define hidden swiftcc i64 @swiftself_indirect_tail(ptr swiftself %arg) {
416 ; DARWIN-LABEL: name: swiftself_indirect_tail
417 ; DARWIN: bb.1 (%ir-block.0):
418 ; DARWIN-NEXT: liveins: $x20
419 ; DARWIN-NEXT: {{ $}}
420 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x20
421 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
422 ; DARWIN-NEXT: BL @pluto, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
423 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
424 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
425 ; DARWIN-NEXT: $x20 = COPY [[COPY]](p0)
426 ; DARWIN-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x20
427 ; WINDOWS-LABEL: name: swiftself_indirect_tail
428 ; WINDOWS: bb.1 (%ir-block.0):
429 ; WINDOWS-NEXT: liveins: $x20
430 ; WINDOWS-NEXT: {{ $}}
431 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x20
432 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
433 ; WINDOWS-NEXT: BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
434 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
435 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
436 ; WINDOWS-NEXT: $x20 = COPY [[COPY]](p0)
437 ; WINDOWS-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20
438 %tmp = call ptr @pluto()
439 %tmp2 = tail call swiftcc i64 %tmp(ptr swiftself %arg)
443 ; Verify that we can tail call musttail callees.
444 declare void @must_callee(ptr)
445 define void @foo(ptr) {
446 ; DARWIN-LABEL: name: foo
447 ; DARWIN: bb.1 (%ir-block.1):
448 ; DARWIN-NEXT: liveins: $x0
449 ; DARWIN-NEXT: {{ $}}
450 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
451 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
452 ; DARWIN-NEXT: $x0 = COPY [[C]](p0)
453 ; DARWIN-NEXT: TCRETURNdi @must_callee, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0
454 ; WINDOWS-LABEL: name: foo
455 ; WINDOWS: bb.1 (%ir-block.1):
456 ; WINDOWS-NEXT: liveins: $x0
457 ; WINDOWS-NEXT: {{ $}}
458 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
459 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
460 ; WINDOWS-NEXT: $x0 = COPY [[C]](p0)
461 ; WINDOWS-NEXT: TCRETURNdi @must_callee, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0
462 musttail call void @must_callee(ptr null)
466 ; Verify we emit a tail call with a type that requires splitting into
467 ; multiple registers.
468 declare void @outgoing_v16f16(<16 x half>)
469 define void @test_tail_call_outgoing_v16f16(<16 x half> %arg) {
470 ; DARWIN-LABEL: name: test_tail_call_outgoing_v16f16
471 ; DARWIN: bb.1 (%ir-block.0):
472 ; DARWIN-NEXT: liveins: $q0, $q1
473 ; DARWIN-NEXT: {{ $}}
474 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
475 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
476 ; DARWIN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
477 ; DARWIN-NEXT: [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
478 ; DARWIN-NEXT: $q0 = COPY [[UV]](<8 x s16>)
479 ; DARWIN-NEXT: $q1 = COPY [[UV1]](<8 x s16>)
480 ; DARWIN-NEXT: TCRETURNdi @outgoing_v16f16, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
481 ; WINDOWS-LABEL: name: test_tail_call_outgoing_v16f16
482 ; WINDOWS: bb.1 (%ir-block.0):
483 ; WINDOWS-NEXT: liveins: $q0, $q1
484 ; WINDOWS-NEXT: {{ $}}
485 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
486 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
487 ; WINDOWS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
488 ; WINDOWS-NEXT: [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
489 ; WINDOWS-NEXT: $q0 = COPY [[UV]](<8 x s16>)
490 ; WINDOWS-NEXT: $q1 = COPY [[UV1]](<8 x s16>)
491 ; WINDOWS-NEXT: TCRETURNdi @outgoing_v16f16, 0, csr_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
492 tail call void @outgoing_v16f16(<16 x half> %arg)