1 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
3 ; ScalarEvolution should be able to understand the loop and eliminate the casts.
7 define void @foo(i32* nocapture %d, i32 %n) nounwind {
9 %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
10 br i1 %0, label %bb.nph, label %return
12 bb.nph: ; preds = %entry
15 bb: ; preds = %bb1, %bb.nph
16 %i.02 = phi i32 [ %5, %bb1 ], [ 0, %bb.nph ] ; <i32> [#uses=2]
17 %p.01 = phi i8 [ %4, %bb1 ], [ -1, %bb.nph ] ; <i8> [#uses=2]
18 %1 = sext i8 %p.01 to i32 ; <i32> [#uses=1]
19 %2 = sext i32 %i.02 to i64 ; <i64> [#uses=1]
20 %3 = getelementptr i32, i32* %d, i64 %2 ; <i32*> [#uses=1]
21 store i32 %1, i32* %3, align 4
22 %4 = add i8 %p.01, 1 ; <i8> [#uses=1]
23 %5 = add i32 %i.02, 1 ; <i32> [#uses=2]
27 %6 = icmp slt i32 %5, %n ; <i1> [#uses=1]
28 br i1 %6, label %bb, label %bb1.return_crit_edge
30 bb1.return_crit_edge: ; preds = %bb1
33 return: ; preds = %bb1.return_crit_edge, %entry
37 ; ScalarEvolution should be able to find the maximum tripcount
38 ; of this multiple-exit loop, and if it doesn't know the exact
39 ; count, it should say so.
42 ; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count.
43 ; CHECK: Loop %for.cond: max backedge-taken count is 5
45 @.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=2]
47 define i32 @main() nounwind {
51 for.cond: ; preds = %for.inc, %entry
52 %g_4.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] ; <i32> [#uses=5]
53 %cmp = icmp slt i32 %g_4.0, 5 ; <i1> [#uses=1]
54 br i1 %cmp, label %for.body, label %for.end
56 for.body: ; preds = %for.cond
57 %conv = trunc i32 %g_4.0 to i16 ; <i16> [#uses=1]
58 %tobool.not = icmp eq i16 %conv, 0 ; <i1> [#uses=1]
59 %tobool3 = icmp ne i32 %g_4.0, 0 ; <i1> [#uses=1]
60 %or.cond = and i1 %tobool.not, %tobool3 ; <i1> [#uses=1]
61 br i1 %or.cond, label %for.end, label %for.inc
63 for.inc: ; preds = %for.body
64 %add = add nsw i32 %g_4.0, 1 ; <i32> [#uses=1]
67 for.end: ; preds = %for.body, %for.cond
68 %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0]
72 declare i32 @printf(i8*, ...)
74 define void @test(i8* %a, i32 %n) nounwind {
76 %cmp1 = icmp sgt i32 %n, 0
77 br i1 %cmp1, label %for.body.lr.ph, label %for.end
79 for.body.lr.ph: ; preds = %entry
80 %tmp = zext i32 %n to i64
83 for.body: ; preds = %for.body, %for.body.lr.ph
84 %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
85 %arrayidx = getelementptr i8, i8* %a, i64 %indvar
86 store i8 0, i8* %arrayidx, align 1
87 %indvar.next = add i64 %indvar, 1
88 %exitcond = icmp ne i64 %indvar.next, %tmp
89 br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
91 for.cond.for.end_crit_edge: ; preds = %for.body
94 for.end: ; preds = %for.cond.for.end_crit_edge, %entry
98 ; CHECK: Determining loop execution counts for: @test
99 ; CHECK-NEXT: backedge-taken count is
100 ; CHECK-NEXT: max backedge-taken count is -1
102 ; PR19799: Indvars miscompile due to an incorrect max backedge taken count from SCEV.
103 ; CHECK-LABEL: @pr19799
104 ; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count.
105 ; CHECK: Loop %for.body.i: max backedge-taken count is 1
106 @a = common global i32 0, align 4
108 define i32 @pr19799() {
110 store i32 -1, i32* @a, align 4
113 for.body.i: ; preds = %for.cond.i, %entry
114 %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
115 %tobool.i = icmp eq i32 %storemerge1.i, 0
116 %add.i.i = add nsw i32 %storemerge1.i, 2
117 br i1 %tobool.i, label %bar.exit, label %for.cond.i
119 for.cond.i: ; preds = %for.body.i
120 store i32 %add.i.i, i32* @a, align 4
121 %cmp.i = icmp slt i32 %storemerge1.i, 0
122 br i1 %cmp.i, label %for.body.i, label %bar.exit
124 bar.exit: ; preds = %for.cond.i, %for.body.i
128 ; PR18886: Indvars miscompile due to an incorrect max backedge taken count from SCEV.
129 ; CHECK-LABEL: @pr18886
130 ; CHECK: Loop %for.body: <multiple exits> Unpredictable backedge-taken count.
131 ; CHECK: Loop %for.body: max backedge-taken count is 3
132 @aa = global i64 0, align 8
134 define i32 @pr18886() {
136 store i64 -21, i64* @aa, align 8
140 %storemerge1 = phi i64 [ -21, %entry ], [ %add, %for.cond ]
141 %tobool = icmp eq i64 %storemerge1, 0
142 %add = add nsw i64 %storemerge1, 8
143 br i1 %tobool, label %return, label %for.cond
146 store i64 %add, i64* @aa, align 8
147 %cmp = icmp slt i64 %add, 9
148 br i1 %cmp, label %for.body, label %return
151 %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
155 ; Here we have a must-exit loop latch that is not computable and a
156 ; may-exit early exit that can only have one non-exiting iteration
157 ; before the check is forever skipped.
159 ; CHECK-LABEL: @cannot_compute_mustexit
160 ; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count.
161 ; CHECK: Loop %for.body.i: Unpredictable max backedge-taken count.
162 @b = common global i32 0, align 4
164 define i32 @cannot_compute_mustexit() {
166 store i32 -1, i32* @a, align 4
169 for.body.i: ; preds = %for.cond.i, %entry
170 %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
171 %tobool.i = icmp eq i32 %storemerge1.i, 0
172 %add.i.i = add nsw i32 %storemerge1.i, 2
173 br i1 %tobool.i, label %bar.exit, label %for.cond.i
175 for.cond.i: ; preds = %for.body.i
176 store i32 %add.i.i, i32* @a, align 4
177 %ld = load volatile i32, i32* @b
178 %cmp.i = icmp ne i32 %ld, 0
179 br i1 %cmp.i, label %for.body.i, label %bar.exit
181 bar.exit: ; preds = %for.cond.i, %for.body.i
185 ; This loop has two must-exits, both of which dominate the latch. The
186 ; MaxBECount should be the minimum of them.
188 ; CHECK-LABEL: @two_mustexit
189 ; CHECK: Loop %for.body.i: <multiple exits> backedge-taken count is 1
190 ; CHECK: Loop %for.body.i: max backedge-taken count is 1
191 define i32 @two_mustexit() {
193 store i32 -1, i32* @a, align 4
196 for.body.i: ; preds = %for.cond.i, %entry
197 %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
198 %tobool.i = icmp sgt i32 %storemerge1.i, 0
199 %add.i.i = add nsw i32 %storemerge1.i, 2
200 br i1 %tobool.i, label %bar.exit, label %for.cond.i
202 for.cond.i: ; preds = %for.body.i
203 store i32 %add.i.i, i32* @a, align 4
204 %cmp.i = icmp slt i32 %storemerge1.i, 3
205 br i1 %cmp.i, label %for.body.i, label %bar.exit
207 bar.exit: ; preds = %for.cond.i, %for.body.i
211 ; CHECK-LABEL: @ne_max_trip_count_1
212 ; CHECK: Loop %for.body: max backedge-taken count is 7
213 define i32 @ne_max_trip_count_1(i32 %n) {
215 %masked = and i32 %n, 7
219 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
220 %add = add nsw i32 %i, 1
221 %cmp = icmp ne i32 %i, %masked
222 br i1 %cmp, label %for.body, label %bar.exit
228 ; CHECK-LABEL: @ne_max_trip_count_2
229 ; CHECK: Loop %for.body: max backedge-taken count is -1
230 define i32 @ne_max_trip_count_2(i32 %n) {
232 %masked = and i32 %n, 7
236 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
237 %add = add nsw i32 %i, 1
238 %cmp = icmp ne i32 %add, %masked
239 br i1 %cmp, label %for.body, label %bar.exit
245 ; CHECK-LABEL: @ne_max_trip_count_3
246 ; CHECK: Loop %for.body: max backedge-taken count is 6
247 define i32 @ne_max_trip_count_3(i32 %n) {
249 %masked = and i32 %n, 7
250 %guard = icmp eq i32 %masked, 0
251 br i1 %guard, label %exit, label %for.preheader
257 %i = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
258 %add = add nsw i32 %i, 1
259 %cmp = icmp ne i32 %add, %masked
260 br i1 %cmp, label %for.body, label %loop.exit
269 ; CHECK-LABEL: @ne_max_trip_count_4
270 ; CHECK: Loop %for.body: max backedge-taken count is -2
271 define i32 @ne_max_trip_count_4(i32 %n) {
273 %guard = icmp eq i32 %n, 0
274 br i1 %guard, label %exit, label %for.preheader
280 %i = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
281 %add = add nsw i32 %i, 1
282 %cmp = icmp ne i32 %add, %n
283 br i1 %cmp, label %for.body, label %loop.exit
292 ; The end bound of the loop can change between iterations, so the exact trip
293 ; count is unknown, but SCEV can calculate the max trip count.
294 define void @changing_end_bound(i32* %n_addr, i32* %addr) {
295 ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound
296 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
297 ; CHECK: Loop %loop: max backedge-taken count is 2147483646
302 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
303 %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
304 %val = load atomic i32, i32* %addr unordered, align 4
306 %acc.next = add i32 %acc, %val
307 %iv.next = add nsw i32 %iv, 1
308 %n = load atomic i32, i32* %n_addr unordered, align 4
309 %cmp = icmp slt i32 %iv.next, %n
310 br i1 %cmp, label %loop, label %loop.exit
316 ; Similar test as above, but unknown start value.
317 ; Also, there's no nsw on the iv.next, but SCEV knows
318 ; the termination condition is LT, so the IV cannot wrap.
319 define void @changing_end_bound2(i32 %start, i32* %n_addr, i32* %addr) {
320 ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound2
321 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
322 ; CHECK: Loop %loop: max backedge-taken count is -1
327 %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
328 %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
329 %val = load atomic i32, i32* %addr unordered, align 4
331 %acc.next = add i32 %acc, %val
332 %iv.next = add i32 %iv, 1
333 %n = load atomic i32, i32* %n_addr unordered, align 4
334 %cmp = icmp slt i32 %iv.next, %n
335 br i1 %cmp, label %loop, label %loop.exit
341 ; changing end bound and greater than one stride
342 define void @changing_end_bound3(i32 %start, i32* %n_addr, i32* %addr) {
343 ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound3
344 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
345 ; CHECK: Loop %loop: max backedge-taken count is 1073741823
350 %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
351 %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
352 %val = load atomic i32, i32* %addr unordered, align 4
354 %acc.next = add i32 %acc, %val
355 %iv.next = add nsw i32 %iv, 4
356 %n = load atomic i32, i32* %n_addr unordered, align 4
357 %cmp = icmp slt i32 %iv.next, %n
358 br i1 %cmp, label %loop, label %loop.exit
364 ; same as above test, but the IV can wrap around.
365 ; so the max backedge taken count is unpredictable.
366 define void @changing_end_bound4(i32 %start, i32* %n_addr, i32* %addr) {
367 ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound4
368 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
369 ; CHECK: Loop %loop: Unpredictable max backedge-taken count.
374 %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
375 %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
376 %val = load atomic i32, i32* %addr unordered, align 4
378 %acc.next = add i32 %acc, %val
379 %iv.next = add i32 %iv, 4
380 %n = load atomic i32, i32* %n_addr unordered, align 4
381 %cmp = icmp slt i32 %iv.next, %n
382 br i1 %cmp, label %loop, label %loop.exit
388 ; unknown stride. Since it's not knownPositive, we do not estimate the max
389 ; backedge taken count.
390 define void @changing_end_bound5(i32 %stride, i32 %start, i32* %n_addr, i32* %addr) {
391 ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound5
392 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
393 ; CHECK: Loop %loop: Unpredictable max backedge-taken count.
398 %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
399 %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
400 %val = load atomic i32, i32* %addr unordered, align 4
402 %acc.next = add i32 %acc, %val
403 %iv.next = add nsw i32 %iv, %stride
404 %n = load atomic i32, i32* %n_addr unordered, align 4
405 %cmp = icmp slt i32 %iv.next, %n
406 br i1 %cmp, label %loop, label %loop.exit
412 ; negative stride value
413 define void @changing_end_bound6(i32 %start, i32* %n_addr, i32* %addr) {
414 ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound6
415 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
416 ; CHECK: Loop %loop: Unpredictable max backedge-taken count.
421 %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
422 %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ]
423 %val = load atomic i32, i32* %addr unordered, align 4
425 %acc.next = add i32 %acc, %val
426 %iv.next = add nsw i32 %iv, -1
427 %n = load atomic i32, i32* %n_addr unordered, align 4
428 %cmp = icmp slt i32 %iv.next, %n
429 br i1 %cmp, label %loop, label %loop.exit