1 ; This test verifies that the loop vectorizer will not vectorizes low trip count
2 ; loops that require runtime checks (Trip count is computed with profile info).
4 ; RUN: opt < %s -passes=loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
6 target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
8 @tab = common global [32 x i8] zeroinitializer, align 1
10 define i32 @foo_low_trip_count1(i32 %bound) {
11 ; Simple loop with low tripcount. Should not be vectorized.
13 ; CHECK-LABEL: @foo_low_trip_count1(
14 ; CHECK-NOT: <{{[0-9]+}} x i8>
19 for.body: ; preds = %for.body, %entry
20 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
21 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
22 %0 = load i8, ptr %arrayidx, align 1
23 %cmp1 = icmp eq i8 %0, 0
24 %. = select i1 %cmp1, i8 2, i8 1
25 store i8 %., ptr %arrayidx, align 1
26 %inc = add nsw i32 %i.08, 1
27 %exitcond = icmp eq i32 %i.08, %bound
28 br i1 %exitcond, label %for.end, label %for.body, !prof !1
30 for.end: ; preds = %for.body
34 define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
35 ; The loop has a same invocation count with the function, but has a low
36 ; trip_count per invocation and not worth to vectorize.
38 ; CHECK-LABEL: @foo_low_trip_count2(
39 ; CHECK-NOT: <{{[0-9]+}} x i8>
44 for.body: ; preds = %for.body, %entry
45 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
46 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
47 %0 = load i8, ptr %arrayidx, align 1
48 %cmp1 = icmp eq i8 %0, 0
49 %. = select i1 %cmp1, i8 2, i8 1
50 store i8 %., ptr %arrayidx, align 1
51 %inc = add nsw i32 %i.08, 1
52 %exitcond = icmp eq i32 %i.08, %bound
53 br i1 %exitcond, label %for.end, label %for.body, !prof !1
55 for.end: ; preds = %for.body
59 define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
60 ; The loop has low invocation count compare to the function invocation count,
61 ; but has a high trip count per invocation. Vectorize it.
63 ; CHECK-LABEL: @foo_low_trip_count3(
64 ; CHECK: [[VECTOR_BODY:vector\.body]]:
65 ; CHECK: br i1 [[TMP9:%.*]], label [[MIDDLE_BLOCK:%.*]], label %[[VECTOR_BODY]], !prof [[LP3:\!.*]],
66 ; CHECK: [[FOR_BODY:for\.body]]:
67 ; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_END_LOOPEXIT:%.*]], label %[[FOR_BODY]], !prof [[LP6:\!.*]],
69 br i1 %cond, label %for.preheader, label %for.end, !prof !2
74 for.body: ; preds = %for.body, %entry
75 %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
76 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
77 %0 = load i8, ptr %arrayidx, align 1
78 %cmp1 = icmp eq i8 %0, 0
79 %. = select i1 %cmp1, i8 2, i8 1
80 store i8 %., ptr %arrayidx, align 1
81 %inc = add nsw i32 %i.08, 1
82 %exitcond = icmp eq i32 %i.08, %bound
83 br i1 %exitcond, label %for.end, label %for.body, !prof !3
85 for.end: ; preds = %for.body
89 define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
90 ; Simple loop with low tripcount and inequality test for exit.
91 ; Should not be vectorized.
93 ; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
94 ; CHECK-NOT: <{{[0-9]+}} x i8>
99 for.body: ; preds = %for.body, %entry
100 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
101 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
102 %0 = load i8, ptr %arrayidx, align 1
103 %cmp1 = icmp eq i8 %0, 0
104 %. = select i1 %cmp1, i8 2, i8 1
105 store i8 %., ptr %arrayidx, align 1
106 %inc = add nsw i32 %i.08, 1
107 %exitcond = icmp sgt i32 %i.08, %bound
108 br i1 %exitcond, label %for.end, label %for.body, !prof !1
110 for.end: ; preds = %for.body
114 define i32 @const_low_trip_count() {
115 ; Simple loop with constant, small trip count and no profiling info.
117 ; CHECK-LABEL: @const_low_trip_count
118 ; CHECK-NOT: <{{[0-9]+}} x i8>
123 for.body: ; preds = %for.body, %entry
124 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
125 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
126 %0 = load i8, ptr %arrayidx, align 1
127 %cmp1 = icmp eq i8 %0, 0
128 %. = select i1 %cmp1, i8 2, i8 1
129 store i8 %., ptr %arrayidx, align 1
130 %inc = add nsw i32 %i.08, 1
131 %exitcond = icmp slt i32 %i.08, 2
132 br i1 %exitcond, label %for.body, label %for.end
134 for.end: ; preds = %for.body
138 define i32 @const_large_trip_count() {
139 ; Simple loop with constant large trip count and no profiling info.
141 ; CHECK-LABEL: @const_large_trip_count
142 ; CHECK: <{{[0-9]+}} x i8>
147 for.body: ; preds = %for.body, %entry
148 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
149 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
150 %0 = load i8, ptr %arrayidx, align 1
151 %cmp1 = icmp eq i8 %0, 0
152 %. = select i1 %cmp1, i8 2, i8 1
153 store i8 %., ptr %arrayidx, align 1
154 %inc = add nsw i32 %i.08, 1
155 %exitcond = icmp slt i32 %i.08, 1000
156 br i1 %exitcond, label %for.body, label %for.end
158 for.end: ; preds = %for.body
162 define i32 @const_small_trip_count_step() {
163 ; Simple loop with static, small trip count and no profiling info.
165 ; CHECK-LABEL: @const_small_trip_count_step
166 ; CHECK-NOT: <{{[0-9]+}} x i8>
171 for.body: ; preds = %for.body, %entry
172 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
173 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
174 %0 = load i8, ptr %arrayidx, align 1
175 %cmp1 = icmp eq i8 %0, 0
176 %. = select i1 %cmp1, i8 2, i8 1
177 store i8 %., ptr %arrayidx, align 1
178 %inc = add nsw i32 %i.08, 5
179 %exitcond = icmp slt i32 %i.08, 10
180 br i1 %exitcond, label %for.body, label %for.end
182 for.end: ; preds = %for.body
186 define i32 @const_trip_over_profile() {
187 ; constant trip count takes precedence over profile data
189 ; CHECK-LABEL: @const_trip_over_profile
190 ; CHECK: <{{[0-9]+}} x i8>
195 for.body: ; preds = %for.body, %entry
196 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
197 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
198 %0 = load i8, ptr %arrayidx, align 1
199 %cmp1 = icmp eq i8 %0, 0
200 %. = select i1 %cmp1, i8 2, i8 1
201 store i8 %., ptr %arrayidx, align 1
202 %inc = add nsw i32 %i.08, 1
203 %exitcond = icmp slt i32 %i.08, 1000
204 br i1 %exitcond, label %for.body, label %for.end, !prof !1
206 for.end: ; preds = %for.body
210 ; CHECK: [[LP3]] = !{!"branch_weights", i32 10, i32 2490}
211 ; CHECK: [[LP6]] = !{!"branch_weights", i32 10, i32 0}
212 ; original loop has latchExitWeight=10 and backedgeTakenWeight=10,000,
213 ; therefore estimatedBackedgeTakenCount=1,000 and estimatedTripCount=1,001.
214 ; Vectorizing by 4 produces estimatedTripCounts of 1,001/4=250 and 1,001%4=1
215 ; for vectorized and remainder loops, respectively, therefore their
216 ; estimatedBackedgeTakenCounts are 249 and 0, and so the weights recorded with
217 ; loop invocation weights of 10 are the above {10, 2490} and {10, 0}.
219 !0 = !{!"function_entry_count", i64 100}
220 !1 = !{!"branch_weights", i32 100, i32 0}
221 !2 = !{!"branch_weights", i32 10, i32 90}
222 !3 = !{!"branch_weights", i32 10, i32 10000}