1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
4 declare void @callee_stack0()
5 declare void @callee_stack8([8 x i64], i64)
6 declare void @callee_stack16([8 x i64], i64, i64)
8 define dso_local void @caller_to0_from0() nounwind {
9 ; CHECK-LABEL: caller_to0_from0:
11 ; CHECK-NEXT: b callee_stack0
12 tail call void @callee_stack0()
16 define dso_local void @caller_to0_from8([8 x i64], i64) nounwind{
17 ; CHECK-LABEL: caller_to0_from8:
19 ; CHECK-NEXT: b callee_stack0
21 tail call void @callee_stack0()
25 define dso_local void @caller_to8_from0() {
26 ; CHECK-LABEL: caller_to8_from0:
28 ; CHECK-NEXT: sub sp, sp, #32
29 ; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
30 ; CHECK-NEXT: .cfi_def_cfa_offset 32
31 ; CHECK-NEXT: .cfi_offset w30, -16
32 ; CHECK-NEXT: mov w8, #42
33 ; CHECK-NEXT: str x8, [sp]
34 ; CHECK-NEXT: bl callee_stack8
35 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
36 ; CHECK-NEXT: add sp, sp, #32
39 ; Caller isn't going to clean up any extra stack we allocate, so it
40 ; can't be a tail call.
41 tail call void @callee_stack8([8 x i64] undef, i64 42)
45 define dso_local void @caller_to8_from8([8 x i64], i64 %a) {
46 ; CHECK-LABEL: caller_to8_from8:
48 ; CHECK-NEXT: mov w8, #42
49 ; CHECK-NEXT: str x8, [sp]
50 ; CHECK-NEXT: b callee_stack8
52 ; This should reuse our stack area for the 42
53 tail call void @callee_stack8([8 x i64] undef, i64 42)
57 define dso_local void @caller_to16_from8([8 x i64], i64 %a) {
58 ; CHECK-LABEL: caller_to16_from8:
60 ; CHECK-NEXT: sub sp, sp, #32
61 ; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
62 ; CHECK-NEXT: .cfi_def_cfa_offset 32
63 ; CHECK-NEXT: .cfi_offset w30, -16
64 ; CHECK-NEXT: bl callee_stack16
65 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
66 ; CHECK-NEXT: add sp, sp, #32
69 ; Shouldn't be a tail call: we can't use SP+8 because our caller might
70 ; have something there. This may sound obvious but implementation does
71 ; some funky aligning.
72 tail call void @callee_stack16([8 x i64] undef, i64 undef, i64 undef)
76 define dso_local void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) {
77 ; CHECK-LABEL: caller_to8_from24:
79 ; CHECK-NEXT: mov w8, #42
80 ; CHECK-NEXT: str x8, [sp]
81 ; CHECK-NEXT: b callee_stack8
83 ; Reuse our area, putting "42" at incoming sp
84 tail call void @callee_stack8([8 x i64] undef, i64 42)
88 define dso_local void @caller_to16_from16([8 x i64], i64 %a, i64 %b) {
89 ; CHECK-LABEL: caller_to16_from16:
91 ; CHECK-NEXT: ldp x8, x9, [sp]
92 ; CHECK-NEXT: stp x9, x8, [sp]
93 ; CHECK-NEXT: b callee_stack16
95 ; Here we want to make sure that both loads happen before the stores:
96 ; otherwise either %a or %b will be wrongly clobbered.
97 tail call void @callee_stack16([8 x i64] undef, i64 %b, i64 %a)
103 @func = dso_local global ptr null
105 define dso_local void @indirect_tail() {
106 ; CHECK-LABEL: indirect_tail:
108 ; CHECK-NEXT: adrp x8, func
109 ; CHECK-NEXT: mov w0, #42
110 ; CHECK-NEXT: ldr x1, [x8, :lo12:func]
113 %fptr = load ptr, ptr @func
114 tail call void %fptr(i32 42)