1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
4 ; Check that a tail call from an SVE function to another SVE function
5 ; can use a tail-call, as the same registers will be preserved by the
7 define <vscale x 4 x i32> @sve_caller_sve_callee() nounwind {
8 ; CHECK-LABEL: sve_caller_sve_callee:
10 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
11 ; CHECK-NEXT: addvl sp, sp, #-2
12 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
13 ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
15 ; CHECK-NEXT: //NO_APP
16 ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
17 ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
18 ; CHECK-NEXT: addvl sp, sp, #2
19 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
20 ; CHECK-NEXT: b sve_callee
21 tail call void asm sideeffect "", "~{z9},~{z10}"()
22 %call = tail call <vscale x 4 x i32> @sve_callee()
23 ret <vscale x 4 x i32> %call
26 define <vscale x 4 x i32> @sve_caller_sve_callee_fastcc() nounwind {
27 ; CHECK-LABEL: sve_caller_sve_callee_fastcc:
29 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
30 ; CHECK-NEXT: addvl sp, sp, #-2
31 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
32 ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
34 ; CHECK-NEXT: //NO_APP
35 ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
36 ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
37 ; CHECK-NEXT: addvl sp, sp, #2
38 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
39 ; CHECK-NEXT: b sve_callee
40 tail call void asm sideeffect "", "~{z9},~{z10}"()
41 %call = tail call fastcc <vscale x 4 x i32> @sve_callee()
42 ret <vscale x 4 x i32> %call
45 declare <vscale x 4 x i32> @sve_callee()
47 ; Check that a tail call from an SVE function to a non-SVE function
48 ; does not use a tail-call, because after the call many of the SVE
49 ; registers may be clobbered and needs to be restored.
50 define i32 @sve_caller_non_sve_callee(<vscale x 4 x i32> %arg) nounwind {
51 ; CHECK-LABEL: sve_caller_non_sve_callee:
53 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
54 ; CHECK-NEXT: addvl sp, sp, #-18
55 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
56 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
57 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
58 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
59 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
60 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
61 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
62 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
63 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
64 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
65 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
66 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
67 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
68 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
69 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
70 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
71 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
72 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
73 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
74 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
75 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
76 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
77 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
78 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
79 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
80 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
81 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
82 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
84 ; CHECK-NEXT: //NO_APP
85 ; CHECK-NEXT: bl non_sve_callee
86 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
87 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
88 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
89 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
90 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
91 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
92 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
93 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
94 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
95 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
96 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
97 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
98 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
99 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
100 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
101 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
102 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
103 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
104 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
105 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
106 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
107 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
108 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
109 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
110 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
111 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
112 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
113 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
114 ; CHECK-NEXT: addvl sp, sp, #18
115 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
117 tail call void asm sideeffect "", "~{z9},~{z10}"()
118 %call = tail call i32 @non_sve_callee()
122 ; Check that a tail call from an SVE function to a non-SVE function
123 ; does not use a tail-call, because after the call many of the SVE
124 ; registers may be clobbered and needs to be restored.
125 define i32 @sve_caller_non_sve_callee_fastcc(<vscale x 4 x i32> %arg) nounwind {
126 ; CHECK-LABEL: sve_caller_non_sve_callee_fastcc:
128 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
129 ; CHECK-NEXT: addvl sp, sp, #-18
130 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
131 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
132 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
133 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
134 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
135 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
136 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
137 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
138 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
139 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
140 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
141 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
142 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
143 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
144 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
145 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
146 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
147 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
148 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
149 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
150 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
151 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
152 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
153 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
154 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
155 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
156 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
157 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
159 ; CHECK-NEXT: //NO_APP
160 ; CHECK-NEXT: bl non_sve_callee
161 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
162 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
163 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
164 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
165 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
166 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
167 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
168 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
169 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
170 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
171 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
172 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
173 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
174 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
175 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
176 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
177 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
178 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
179 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
180 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
181 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
182 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
183 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
184 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
185 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
186 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
187 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
188 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
189 ; CHECK-NEXT: addvl sp, sp, #18
190 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
192 tail call void asm sideeffect "", "~{z9},~{z10}"()
193 %call = tail call fastcc i32 @non_sve_callee()
197 declare i32 @non_sve_callee()