1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -loop-predication -loop-predication-skip-profitability-checks=false < %s 2>&1 | FileCheck %s
3 ; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,require<branch-prob>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
5 ; latch block exits to a speculation block. BPI already knows (without prof
6 ; data) that deopt is very rarely
7 ; taken. So we do not predicate this loop using that coarse latch check.
8 ; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12%
9 ; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98%
10 define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
11 ; CHECK-LABEL: @donot_predicate(
13 ; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
14 ; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
15 ; CHECK-NEXT: br label [[HEADER:%.*]]
17 ; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
18 ; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
19 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
20 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
21 ; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
22 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
23 ; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof !0
25 ; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
26 ; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[DEOPT:%.*]]
28 ; CHECK-NEXT: [[COUNTED_SPECULATION_FAILED:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
29 ; CHECK-NEXT: ret i64 [[COUNTED_SPECULATION_FAILED]]
31 ; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
32 ; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
33 ; CHECK-NEXT: ret i64 [[RESULT_LE]]
36 %length.ext = zext i32 %length to i64
37 %n.pre = load i64, i64* %n_addr, align 4
40 Header: ; preds = %entry, %Latch
41 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
42 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
43 %within.bounds = icmp ult i64 %j2, %length.ext
44 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
45 %innercmp = icmp eq i64 %j2, %n.pre
46 %j.next = add nuw nsw i64 %j2, 1
47 br i1 %innercmp, label %Latch, label %exit, !prof !0
49 Latch: ; preds = %Header
50 %speculate_trip_count = icmp ult i64 %j.next, 1048576
51 br i1 %speculate_trip_count, label %Header, label %deopt
53 deopt: ; preds = %Latch
54 %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
55 ret i64 %counted_speculation_failed
57 exit: ; preds = %Header
58 %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
59 %result.le = load i64, i64* %result.in3.lcssa, align 8
62 !0 = !{!"branch_weights", i32 18, i32 104200}
64 ; predicate loop since there's no profile information and BPI concluded all
65 ; exiting blocks have same probability of exiting from loop.
66 define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
67 ; CHECK-LABEL: @predicate(
69 ; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
70 ; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
71 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]]
72 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]]
73 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
74 ; CHECK-NEXT: br label [[HEADER:%.*]]
76 ; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
77 ; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
78 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
79 ; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
80 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
81 ; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]]
83 ; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
84 ; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]]
86 ; CHECK-NEXT: ret i64 1
88 ; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
89 ; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
90 ; CHECK-NEXT: ret i64 [[RESULT_LE]]
93 %length.ext = zext i32 %length to i64
94 %n.pre = load i64, i64* %n_addr, align 4
97 Header: ; preds = %entry, %Latch
98 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
99 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
100 %within.bounds = icmp ult i64 %j2, %length.ext
101 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
102 %innercmp = icmp eq i64 %j2, %n.pre
103 %j.next = add nuw nsw i64 %j2, 1
104 br i1 %innercmp, label %Latch, label %exit
106 Latch: ; preds = %Header
107 %speculate_trip_count = icmp ult i64 %j.next, 1048576
108 br i1 %speculate_trip_count, label %Header, label %exitLatch
110 exitLatch: ; preds = %Latch
113 exit: ; preds = %Header
114 %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
115 %result.le = load i64, i64* %result.in3.lcssa, align 8
119 ; Same as test above but with profiling data that the most probable exit from
120 ; the loop is the header exiting block (not the latch block). So do not predicate.
121 ; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00%
122 ; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99%
123 define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
124 ; CHECK-LABEL: @donot_predicate_prof(
126 ; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
127 ; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
128 ; CHECK-NEXT: br label [[HEADER:%.*]]
130 ; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
131 ; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
132 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
133 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
134 ; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
135 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
136 ; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof !1
138 ; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
139 ; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof !2
141 ; CHECK-NEXT: ret i64 1
143 ; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
144 ; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
145 ; CHECK-NEXT: ret i64 [[RESULT_LE]]
148 %length.ext = zext i32 %length to i64
149 %n.pre = load i64, i64* %n_addr, align 4
152 Header: ; preds = %entry, %Latch
153 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
154 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
155 %within.bounds = icmp ult i64 %j2, %length.ext
156 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
157 %innercmp = icmp eq i64 %j2, %n.pre
158 %j.next = add nuw nsw i64 %j2, 1
159 br i1 %innercmp, label %Latch, label %exit, !prof !1
161 Latch: ; preds = %Header
162 %speculate_trip_count = icmp ult i64 %j.next, 1048576
163 br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
165 exitLatch: ; preds = %Latch
168 exit: ; preds = %Header
169 %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
170 %result.le = load i64, i64* %result.in3.lcssa, align 8
173 declare i64 @llvm.experimental.deoptimize.i64(...)
174 declare void @llvm.experimental.guard(i1, ...)
176 !1 = !{!"branch_weights", i32 104, i32 1042861}
177 !2 = !{!"branch_weights", i32 255129, i32 1}