1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7 ; Several cases where tail call elimination should move the load above the call,
8 ; then eliminate the tail recursion.
12 @global = external global i32 ; <i32*> [#uses=1]
13 @extern_weak_global = extern_weak global i32 ; <i32*> [#uses=1]
16 ; This load can be moved above the call because the function won't write to it
17 ; and the call has no side effects.
18 define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly willreturn {
19 ; CHECK-LABEL: @raise_load_1(
21 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
23 ; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
24 ; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
25 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
26 ; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
28 ; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
29 ; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
31 ; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
32 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ARG:%.*]], align 4
33 ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
34 ; CHECK-NEXT: br label [[TAILRECURSE]]
37 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
38 br i1 %tmp2, label %if, label %else
43 else: ; preds = %entry
44 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
45 %tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
46 %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
47 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
52 ; This load can be moved above the call because the function won't write to it
53 ; and the load provably can't trap.
54 define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
55 ; CHECK-LABEL: @raise_load_2(
57 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
59 ; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ]
60 ; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ]
61 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
62 ; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]]
64 ; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
65 ; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
67 ; CHECK-NEXT: [[NULLCHECK:%.*]] = icmp eq i32* [[A_ARG:%.*]], null
68 ; CHECK-NEXT: br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]]
70 ; CHECK-NEXT: unreachable
72 ; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
73 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* @global, align 4
74 ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
75 ; CHECK-NEXT: br label [[TAILRECURSE]]
78 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
79 br i1 %tmp2, label %if, label %else
84 else: ; preds = %entry
85 %nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
86 br i1 %nullcheck, label %unwind, label %recurse
88 unwind: ; preds = %else
91 recurse: ; preds = %else
92 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
93 %tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
94 %tmp9 = load i32, i32* @global ; <i32> [#uses=1]
95 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
100 ; This load can be safely moved above the call (even though it's from an
101 ; extern_weak global) because the call has no side effects.
102 define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly willreturn {
103 ; CHECK-LABEL: @raise_load_3(
105 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
106 ; CHECK: tailrecurse:
107 ; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
108 ; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
109 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
110 ; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
112 ; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
113 ; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
115 ; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
116 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* @extern_weak_global, align 4
117 ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
118 ; CHECK-NEXT: br label [[TAILRECURSE]]
121 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
122 br i1 %tmp2, label %if, label %else
127 else: ; preds = %entry
128 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
129 %tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
130 %tmp9 = load i32, i32* @extern_weak_global ; <i32> [#uses=1]
131 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
136 ; The second load can be safely moved above the call even though it's from an
137 ; unknown pointer (which normally means it might trap) because the first load
138 ; proves it doesn't trap.
139 define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
140 ; CHECK-LABEL: @raise_load_4(
142 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
143 ; CHECK: tailrecurse:
144 ; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ]
145 ; CHECK-NEXT: [[A_LEN_ARG_TR:%.*]] = phi i32 [ [[A_LEN_ARG:%.*]], [[ENTRY]] ], [ [[FIRST:%.*]], [[RECURSE]] ]
146 ; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ]
147 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG_TR]]
148 ; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]]
150 ; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
151 ; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
153 ; CHECK-NEXT: [[NULLCHECK:%.*]] = icmp eq i32* [[A_ARG:%.*]], null
154 ; CHECK-NEXT: br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]]
156 ; CHECK-NEXT: unreachable
158 ; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
159 ; CHECK-NEXT: [[FIRST]] = load i32, i32* [[A_ARG]], align 4
160 ; CHECK-NEXT: [[SECOND:%.*]] = load i32, i32* [[A_ARG]], align 4
161 ; CHECK-NEXT: [[TMP10]] = add i32 [[SECOND]], [[ACCUMULATOR_TR]]
162 ; CHECK-NEXT: br label [[TAILRECURSE]]
165 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
166 br i1 %tmp2, label %if, label %else
171 else: ; preds = %entry
172 %nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
173 br i1 %nullcheck, label %unwind, label %recurse
175 unwind: ; preds = %else
178 recurse: ; preds = %else
179 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
180 %first = load i32, i32* %a_arg ; <i32> [#uses=1]
181 %tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1]
182 %second = load i32, i32* %a_arg ; <i32> [#uses=1]
183 %tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
187 ; This load can be moved above the call because the function won't write to it
188 ; and the a_arg is dereferenceable.
189 define fastcc i32 @raise_load_5(i32* dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly nofree nosync {
190 ; CHECK-LABEL: @raise_load_5(
192 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
193 ; CHECK: tailrecurse:
194 ; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
195 ; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
196 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
197 ; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
199 ; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
200 ; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
202 ; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
203 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ARG:%.*]], align 4
204 ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
205 ; CHECK-NEXT: br label [[TAILRECURSE]]
208 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
209 br i1 %tmp2, label %if, label %else
214 else: ; preds = %entry
215 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
216 %tmp8 = call fastcc i32 @raise_load_5(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
217 %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
218 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
222 ; This load can be moved above the call because the function call does not write to the memory the load
223 ; is accessing and the load is safe to speculate.
224 define fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
225 ; CHECK-LABEL: @raise_load_6(
227 ; CHECK-NEXT: [[S:%.*]] = alloca i32, align 4
228 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
229 ; CHECK: tailrecurse:
230 ; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
231 ; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
232 ; CHECK-NEXT: store i32 4, i32* [[S]], align 4
233 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
234 ; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
236 ; CHECK-NEXT: store i32 1, i32* [[A_ARG:%.*]], align 4
237 ; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
238 ; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
240 ; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
241 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[S]], align 4
242 ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
243 ; CHECK-NEXT: br label [[TAILRECURSE]]
248 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
249 br i1 %tmp2, label %if, label %else
252 store i32 1, i32* %a_arg
255 else: ; preds = %entry
256 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
257 %tmp8 = call fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
258 %tmp9 = load i32, i32* %s ; <i32> [#uses=1]
259 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]