1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s
3 ; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s
5 ; Perform tail call optimization for global address.
6 declare i32 @callee_tail(i32 %i)
7 define i32 @caller_tail(i32 %i) nounwind {
8 ; CHECK-LABEL: caller_tail:
9 ; CHECK: # %bb.0: # %entry
10 ; CHECK-NEXT: tail callee_tail
12 %r = tail call i32 @callee_tail(i32 %i)
16 ; Perform tail call optimization for external symbol.
17 @dest = global [2 x i8] zeroinitializer
18 declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
19 define void @caller_extern(ptr %src) optsize {
20 ; CHECK-LABEL: caller_extern:
21 ; CHECK: # %bb.0: # %entry
22 ; CHECK-NEXT: lui a1, %hi(dest)
23 ; CHECK-NEXT: addi a1, a1, %lo(dest)
24 ; CHECK-NEXT: li a2, 7
25 ; CHECK-NEXT: mv a3, a0
26 ; CHECK-NEXT: mv a0, a1
27 ; CHECK-NEXT: mv a1, a3
28 ; CHECK-NEXT: tail memcpy
30 tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false)
34 ; Perform tail call optimization for external symbol.
35 @dest_pgso = global [2 x i8] zeroinitializer
36 define void @caller_extern_pgso(ptr %src) !prof !14 {
37 ; CHECK-LABEL: caller_extern_pgso:
38 ; CHECK: # %bb.0: # %entry
39 ; CHECK-NEXT: lui a1, %hi(dest_pgso)
40 ; CHECK-NEXT: addi a1, a1, %lo(dest_pgso)
41 ; CHECK-NEXT: li a2, 7
42 ; CHECK-NEXT: mv a3, a0
43 ; CHECK-NEXT: mv a0, a1
44 ; CHECK-NEXT: mv a1, a3
45 ; CHECK-NEXT: tail memcpy
47 tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false)
51 ; Perform indirect tail call optimization (for function pointer call).
52 declare void @callee_indirect1()
53 declare void @callee_indirect2()
54 define void @caller_indirect_tail(i32 %a) nounwind {
55 ; CHECK-LABEL: caller_indirect_tail:
56 ; CHECK: # %bb.0: # %entry
57 ; CHECK-NEXT: beqz a0, .LBB3_2
58 ; CHECK-NEXT: # %bb.1: # %entry
59 ; CHECK-NEXT: lui t1, %hi(callee_indirect2)
60 ; CHECK-NEXT: addi t1, t1, %lo(callee_indirect2)
62 ; CHECK-NEXT: .LBB3_2:
63 ; CHECK-NEXT: lui t1, %hi(callee_indirect1)
64 ; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1)
69 %tobool = icmp eq i32 %a, 0
70 %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
71 tail call void %callee()
75 ; Make sure we don't use t0 as the source for jr as that is a hint to pop the
76 ; return address stack on some microarchitectures.
77 define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
78 ; CHECK-LABEL: caller_indirect_no_t0:
80 ; CHECK-NEXT: mv t1, a0
81 ; CHECK-NEXT: mv a0, a1
82 ; CHECK-NEXT: mv a1, a2
83 ; CHECK-NEXT: mv a2, a3
84 ; CHECK-NEXT: mv a3, a4
85 ; CHECK-NEXT: mv a4, a5
86 ; CHECK-NEXT: mv a5, a6
87 ; CHECK-NEXT: mv a6, a7
89 %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
93 ; Do not tail call optimize functions with varargs passed by stack.
94 declare i32 @callee_varargs(i32, ...)
95 define void @caller_varargs(i32 %a, i32 %b) nounwind {
96 ; CHECK-LABEL: caller_varargs:
97 ; CHECK: # %bb.0: # %entry
98 ; CHECK-NEXT: addi sp, sp, -16
99 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
100 ; CHECK-NEXT: sw a0, 0(sp)
101 ; CHECK-NEXT: mv a2, a1
102 ; CHECK-NEXT: mv a3, a0
103 ; CHECK-NEXT: mv a4, a0
104 ; CHECK-NEXT: mv a5, a1
105 ; CHECK-NEXT: mv a6, a1
106 ; CHECK-NEXT: mv a7, a0
107 ; CHECK-NEXT: call callee_varargs
108 ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
109 ; CHECK-NEXT: addi sp, sp, 16
112 %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a)
116 ; Do not tail call optimize if stack is used to pass parameters.
117 declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
118 define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind {
119 ; CHECK-LABEL: caller_args:
120 ; CHECK: # %bb.0: # %entry
121 ; CHECK-NEXT: addi sp, sp, -32
122 ; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
123 ; CHECK-NEXT: lw t0, 32(sp)
124 ; CHECK-NEXT: lw t1, 36(sp)
125 ; CHECK-NEXT: lw t2, 40(sp)
126 ; CHECK-NEXT: lw t3, 44(sp)
127 ; CHECK-NEXT: lw t4, 48(sp)
128 ; CHECK-NEXT: lw t5, 52(sp)
129 ; CHECK-NEXT: sw t5, 20(sp)
130 ; CHECK-NEXT: sw t4, 16(sp)
131 ; CHECK-NEXT: sw t3, 12(sp)
132 ; CHECK-NEXT: sw t2, 8(sp)
133 ; CHECK-NEXT: sw t1, 4(sp)
134 ; CHECK-NEXT: sw t0, 0(sp)
135 ; CHECK-NEXT: call callee_args
136 ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
137 ; CHECK-NEXT: addi sp, sp, 32
140 %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
144 ; Do not tail call optimize if parameters need to be passed indirectly.
145 declare i32 @callee_indirect_args(fp128 %a)
146 define void @caller_indirect_args() nounwind {
147 ; CHECK-LABEL: caller_indirect_args:
148 ; CHECK: # %bb.0: # %entry
149 ; CHECK-NEXT: addi sp, sp, -32
150 ; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
151 ; CHECK-NEXT: lui a0, 262128
152 ; CHECK-NEXT: sw a0, 12(sp)
153 ; CHECK-NEXT: sw zero, 8(sp)
154 ; CHECK-NEXT: sw zero, 4(sp)
155 ; CHECK-NEXT: mv a0, sp
156 ; CHECK-NEXT: sw zero, 0(sp)
157 ; CHECK-NEXT: call callee_indirect_args
158 ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
159 ; CHECK-NEXT: addi sp, sp, 32
162 %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000)
166 ; Perform tail call optimization for external weak symbol.
167 declare extern_weak void @callee_weak()
168 define void @caller_weak() nounwind {
169 ; CHECK-LABEL: caller_weak:
170 ; CHECK: # %bb.0: # %entry
171 ; CHECK-NEXT: tail callee_weak
173 tail call void @callee_weak()
177 ; Exception-handling functions need a special set of instructions to indicate a
178 ; return to the hardware. Tail-calling another function would probably break
180 declare void @callee_irq()
181 define void @caller_irq() nounwind "interrupt"="machine" {
182 ; CHECK-LABEL: caller_irq:
183 ; CHECK: # %bb.0: # %entry
184 ; CHECK-NEXT: addi sp, sp, -64
185 ; CHECK-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
186 ; CHECK-NEXT: sw t0, 56(sp) # 4-byte Folded Spill
187 ; CHECK-NEXT: sw t1, 52(sp) # 4-byte Folded Spill
188 ; CHECK-NEXT: sw t2, 48(sp) # 4-byte Folded Spill
189 ; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
190 ; CHECK-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
191 ; CHECK-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
192 ; CHECK-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
193 ; CHECK-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
194 ; CHECK-NEXT: sw a5, 24(sp) # 4-byte Folded Spill
195 ; CHECK-NEXT: sw a6, 20(sp) # 4-byte Folded Spill
196 ; CHECK-NEXT: sw a7, 16(sp) # 4-byte Folded Spill
197 ; CHECK-NEXT: sw t3, 12(sp) # 4-byte Folded Spill
198 ; CHECK-NEXT: sw t4, 8(sp) # 4-byte Folded Spill
199 ; CHECK-NEXT: sw t5, 4(sp) # 4-byte Folded Spill
200 ; CHECK-NEXT: sw t6, 0(sp) # 4-byte Folded Spill
201 ; CHECK-NEXT: call callee_irq
202 ; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
203 ; CHECK-NEXT: lw t0, 56(sp) # 4-byte Folded Reload
204 ; CHECK-NEXT: lw t1, 52(sp) # 4-byte Folded Reload
205 ; CHECK-NEXT: lw t2, 48(sp) # 4-byte Folded Reload
206 ; CHECK-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
207 ; CHECK-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
208 ; CHECK-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
209 ; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
210 ; CHECK-NEXT: lw a4, 28(sp) # 4-byte Folded Reload
211 ; CHECK-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
212 ; CHECK-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
213 ; CHECK-NEXT: lw a7, 16(sp) # 4-byte Folded Reload
214 ; CHECK-NEXT: lw t3, 12(sp) # 4-byte Folded Reload
215 ; CHECK-NEXT: lw t4, 8(sp) # 4-byte Folded Reload
216 ; CHECK-NEXT: lw t5, 4(sp) # 4-byte Folded Reload
217 ; CHECK-NEXT: lw t6, 0(sp) # 4-byte Folded Reload
218 ; CHECK-NEXT: addi sp, sp, 64
221 tail call void @callee_irq()
225 ; Byval parameters hand the function a pointer directly into the stack area
226 ; we want to reuse during a tail call. Do not tail call optimize functions with
228 declare i32 @callee_byval(ptr byval(ptr) %a)
229 define i32 @caller_byval() nounwind {
230 ; CHECK-LABEL: caller_byval:
231 ; CHECK: # %bb.0: # %entry
232 ; CHECK-NEXT: addi sp, sp, -16
233 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
234 ; CHECK-NEXT: lw a0, 8(sp)
235 ; CHECK-NEXT: sw a0, 4(sp)
236 ; CHECK-NEXT: addi a0, sp, 4
237 ; CHECK-NEXT: call callee_byval
238 ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
239 ; CHECK-NEXT: addi sp, sp, 16
243 %r = tail call i32 @callee_byval(ptr byval(ptr) %a)
247 ; Do not tail call optimize if callee uses structret semantics.
248 %struct.A = type { i32 }
249 @a = global %struct.A zeroinitializer
251 declare void @callee_struct(ptr sret(%struct.A) %a)
252 define void @caller_nostruct() nounwind {
253 ; CHECK-LABEL: caller_nostruct:
254 ; CHECK: # %bb.0: # %entry
255 ; CHECK-NEXT: addi sp, sp, -16
256 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
257 ; CHECK-NEXT: lui a0, %hi(a)
258 ; CHECK-NEXT: addi a0, a0, %lo(a)
259 ; CHECK-NEXT: call callee_struct
260 ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
261 ; CHECK-NEXT: addi sp, sp, 16
264 tail call void @callee_struct(ptr sret(%struct.A) @a)
268 ; Do not tail call optimize if caller uses structret semantics.
269 declare void @callee_nostruct()
270 define void @caller_struct(ptr sret(%struct.A) %a) nounwind {
271 ; CHECK-LABEL: caller_struct:
272 ; CHECK: # %bb.0: # %entry
273 ; CHECK-NEXT: addi sp, sp, -16
274 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
275 ; CHECK-NEXT: call callee_nostruct
276 ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
277 ; CHECK-NEXT: addi sp, sp, 16
280 tail call void @callee_nostruct()
284 ; Do not tail call optimize if disabled.
285 define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" {
286 ; CHECK-LABEL: disable_tail_calls:
287 ; CHECK: # %bb.0: # %entry
288 ; CHECK-NEXT: addi sp, sp, -16
289 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
290 ; CHECK-NEXT: call callee_tail
291 ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
292 ; CHECK-NEXT: addi sp, sp, 16
295 %rv = tail call i32 @callee_tail(i32 %i)
299 ; Duplicate returns to enable tail call optimizations.
304 define i32 @duplicate_returns(i32 %a, i32 %b) nounwind {
305 ; CHECK-LABEL: duplicate_returns:
306 ; CHECK: # %bb.0: # %entry
307 ; CHECK-NEXT: beqz a0, .LBB14_4
308 ; CHECK-NEXT: # %bb.1: # %if.else
309 ; CHECK-NEXT: beqz a1, .LBB14_5
310 ; CHECK-NEXT: # %bb.2: # %if.else4
311 ; CHECK-NEXT: bge a1, a0, .LBB14_6
312 ; CHECK-NEXT: # %bb.3: # %if.then6
313 ; CHECK-NEXT: tail test2
314 ; CHECK-NEXT: .LBB14_4: # %if.then
315 ; CHECK-NEXT: tail test
316 ; CHECK-NEXT: .LBB14_5: # %if.then2
317 ; CHECK-NEXT: tail test1
318 ; CHECK-NEXT: .LBB14_6: # %if.else8
319 ; CHECK-NEXT: tail test3
321 %cmp = icmp eq i32 %a, 0
322 br i1 %cmp, label %if.then, label %if.else
324 if.then: ; preds = %entry
325 %call = tail call i32 @test()
328 if.else: ; preds = %entry
329 %cmp1 = icmp eq i32 %b, 0
330 br i1 %cmp1, label %if.then2, label %if.else4
332 if.then2: ; preds = %if.else
333 %call3 = tail call i32 @test1()
336 if.else4: ; preds = %if.else
337 %cmp5 = icmp sgt i32 %a, %b
338 br i1 %cmp5, label %if.then6, label %if.else8
340 if.then6: ; preds = %if.else4
341 %call7 = tail call i32 @test2()
344 if.else8: ; preds = %if.else4
345 %call9 = tail call i32 @test3()
348 return: ; preds = %if.else8, %if.then6, %if.then2, %if.then
349 %retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ]
353 !llvm.module.flags = !{!0}
354 !0 = !{i32 1, !"ProfileSummary", !1}
355 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
356 !2 = !{!"ProfileFormat", !"InstrProf"}
357 !3 = !{!"TotalCount", i64 10000}
358 !4 = !{!"MaxCount", i64 10}
359 !5 = !{!"MaxInternalCount", i64 1}
360 !6 = !{!"MaxFunctionCount", i64 1000}
361 !7 = !{!"NumCounts", i64 3}
362 !8 = !{!"NumFunctions", i64 3}
363 !9 = !{!"DetailedSummary", !10}
364 !10 = !{!11, !12, !13}
365 !11 = !{i32 10000, i64 100, i32 1}
366 !12 = !{i32 999000, i64 100, i32 1}
367 !13 = !{i32 999999, i64 1, i32 2}
368 !14 = !{!"function_entry_count", i64 0}