1 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 | opt -passes='print<block-freq>' -disable-output -use-iterative-bfi-inference 2>&1 | FileCheck %s
2 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 -S | FileCheck %s --check-prefix=CHECK2
3 ; RUN: opt < %s -passes='print<block-freq>' -use-iterative-bfi-inference -disable-output 2>&1 | FileCheck %s --check-prefix=CHECK3
5 ; The C++ code for this test case is from c-parse.c in 403.gcc (SPEC2006)
6 ; The problem with BFI for the test is solved by applying iterative inference.
7 ; The corresponding CFG graph is shown below, with intended counts for every
8 ; basic block. The hot loop, b3->b4->b2, is not getting proper (large) counts
9 ; unless the -use-iterative-bfi-inference option is specified.
11 ; +-------------------------------------------+
20 ; | +------------> | b2 [625] | -+ |
21 ; | | +----------+ | |
25 ; | +----------+ +----------+ | |
26 ; | | b4 [624] | <-- | b3 [625] | <+---------+
27 ; | +----------+ +----------+ |
31 ; +----------+ +--------------------+
32 ; | b8 [1] | <-- | b7 [2] |
33 ; +----------+ +--------------------+
37 ; +----------+ +----------+ |
38 ; | b9 [1] | <-- | b5 [2] | |
39 ; +----------+ +----------+ |
47 @yydebug = dso_local global i32 0, align 4
49 ; Function Attrs: noinline nounwind uwtable
50 define dso_local i32 @yyparse_1() #0 {
52 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 1, i32 0, i64 -1)
53 %0 = load i32, ptr @yydebug, align 4
54 %cmp = icmp ne i32 %0, 0
56 ; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 1
59 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 2, i32 0, i64 -1)
60 br i1 %cmp, label %b7, label %b3
61 ; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 586
64 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 3, i32 0, i64 -1)
65 br i1 %cmp, label %b7, label %b4
66 ; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 586
67 ; CHECK2: br i1 %cmp, label %b7, label %b4,
68 ; CHECK2-SAME: !prof ![[END172_PROF:[0-9]+]]
71 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 4, i32 0, i64 -1)
73 ; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 585
76 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 5, i32 0, i64 -1)
77 br i1 %cmp, label %b9, label %b6
78 ; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 2
81 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 6, i32 0, i64 -1)
83 ; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1
86 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 7, i32 0, i64 -1)
87 br i1 %cmp, label %b5, label %b8
88 ; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 2
89 ; CHECK2: br i1 %cmp, label %b5, label %b8,
90 ; CHECK2-SAME: !prof ![[FALSE4858_PROF:[0-9]+]]
93 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 8, i32 0, i64 -1)
95 ; CHECK: - b8: float = {{.*}}, int = {{.*}}, count = 1
98 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 9, i32 0, i64 -1)
99 %1 = load i32, ptr @yydebug, align 4
101 ; CHECK: - b9: float = {{.*}}, int = {{.*}}, count = 1
105 ; Another difficult (for BFI) instance with irreducible loops,
106 ; containing 'indirectbr'. The corresponding CFG graph is shown below, with
107 ; intended counts for every basic block.
115 ; +------------------------+
117 ; | +------------------------+ |
121 ; | +-----------+ | |
122 ; | | b3 [8212] | <+-------+ |
123 ; | +-----------+ | | |
127 ; | +------------------------+ |
128 ; | | indirectgoto [17747] | -+
129 ; | +------------------------+
137 ; Function Attrs: nounwind uwtable
138 define dso_local i32 @foo1() #0 !prof !132 {
140 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1)
141 %0 = load i32, ptr @yydebug, align 4
142 %cmp = icmp ne i32 %0, 0
144 ; CHECK3: - b1: float = {{.*}}, int = {{.*}}, count = 1
147 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1)
148 %1 = load i32, ptr @yydebug, align 4
149 switch i32 %1, label %b4 [
150 i32 1, label %indirectgoto
153 ; CHECK3: - b2: float = {{.*}}, int = {{.*}}, count = 86
156 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1)
157 br label %indirectgoto
158 ; CHECK3: - b3: float = {{.*}}, int = {{.*}}, count = 8212
161 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1)
162 %2 = load i32, ptr @yydebug, align 4
164 ; CHECK3: - b4: float = {{.*}}, int = {{.*}}, count = 1
167 %indirect.goto.dest = alloca i8, align 4
168 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1)
169 indirectbr ptr %indirect.goto.dest, [label %b2, label %indirectgoto, label %b4, label %b3], !prof !134
170 ; CHECK3: - indirectgoto: float = {{.*}}, int = {{.*}}, count = 17747
174 declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
176 attributes #0 = { noinline nounwind uwtable "use-sample-profile"}
177 attributes #1 = { nounwind }
179 !llvm.pseudo_probe_desc = !{!1079, !4496}
180 !1079 = !{i64 -7702751003264189226, i64 158496288380146391, !"yyparse_1", null}
181 !4496 = !{i64 7682762345278052905, i64 404850113186107133, !"foo1", null}
182 !132 = !{!"function_entry_count", i64 1}
183 !133 = !{!"branch_weights", i32 0, i32 86, i32 0}
184 !134 = !{!"branch_weights", i32 85, i32 9449, i32 1, i32 8212}
186 ; CHECK2: ![[END172_PROF]] = !{!"branch_weights", i32 1, i32 1003}
187 ; CHECK2: ![[FALSE4858_PROF]] = !{!"branch_weights", i32 2, i32 1}