1 //===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Our algorithm works by first identifying a subset of nodes that must always
10 // be instrumented. We call these nodes ambiguous because knowing the coverage
11 // of all remaining nodes is not enough to infer their coverage status.
13 // In general a node v is ambiguous if there exists two entry-to-terminal paths
14 // P_1 and P_2 such that:
15 // 1. v not in P_1 but P_1 visits a predecessor of v, and
16 // 2. v not in P_2 but P_2 visits a successor of v.
18 // If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
19 // coverage from the coverage of its predecessors, or if condition 2 fails, we
20 // can infer v’s coverage from the coverage of its successors.
22 // Sadly, there are example CFGs where it is not possible to infer all nodes
23 // from the ambiguous nodes alone. Our algorithm selects a minimum number of
24 // extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
26 // Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
28 //===----------------------------------------------------------------------===//
30 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
31 #include "llvm/ADT/DepthFirstIterator.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Support/CRC.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/GraphWriter.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
41 #define DEBUG_TYPE "pgo-block-coverage"
43 STATISTIC(NumFunctions
, "Number of total functions that BCI has processed");
44 STATISTIC(NumIneligibleFunctions
,
45 "Number of functions for which BCI cannot run on");
46 STATISTIC(NumBlocks
, "Number of total basic blocks that BCI has processed");
47 STATISTIC(NumInstrumentedBlocks
,
48 "Number of basic blocks instrumented for coverage");
50 BlockCoverageInference::BlockCoverageInference(const Function
&F
,
51 bool ForceInstrumentEntry
)
52 : F(F
), ForceInstrumentEntry(ForceInstrumentEntry
) {
54 assert(!ForceInstrumentEntry
|| shouldInstrumentBlock(F
.getEntryBlock()));
59 if (shouldInstrumentBlock(BB
))
60 ++NumInstrumentedBlocks
;
64 BlockCoverageInference::BlockSet
65 BlockCoverageInference::getDependencies(const BasicBlock
&BB
) const {
66 assert(BB
.getParent() == &F
);
67 BlockSet Dependencies
;
68 auto It
= PredecessorDependencies
.find(&BB
);
69 if (It
!= PredecessorDependencies
.end())
70 Dependencies
.set_union(It
->second
);
71 It
= SuccessorDependencies
.find(&BB
);
72 if (It
!= SuccessorDependencies
.end())
73 Dependencies
.set_union(It
->second
);
77 uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
81 if (shouldInstrumentBlock(BB
)) {
83 support::endian::write64le(Data
, Index
);
91 bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock
&BB
) const {
92 assert(BB
.getParent() == &F
);
93 auto It
= PredecessorDependencies
.find(&BB
);
94 if (It
!= PredecessorDependencies
.end() && It
->second
.size())
96 It
= SuccessorDependencies
.find(&BB
);
97 if (It
!= SuccessorDependencies
.end() && It
->second
.size())
102 void BlockCoverageInference::findDependencies() {
103 assert(PredecessorDependencies
.empty() && SuccessorDependencies
.empty());
104 // Empirical analysis shows that this algorithm finishes within 5 seconds for
105 // functions with fewer than 1.5K blocks.
106 if (F
.hasFnAttribute(Attribute::NoReturn
) || F
.size() > 1500) {
107 ++NumIneligibleFunctions
;
111 SmallVector
<const BasicBlock
*, 4> TerminalBlocks
;
114 TerminalBlocks
.push_back(&BB
);
116 // Traverse the CFG backwards from the terminal blocks to make sure every
117 // block can reach some terminal block. Otherwise this algorithm will not work
118 // and we must fall back to instrumenting every block.
119 df_iterator_default_set
<const BasicBlock
*> Visited
;
120 for (auto *BB
: TerminalBlocks
)
121 for (auto *N
: inverse_depth_first_ext(BB
, Visited
))
123 if (F
.size() != Visited
.size()) {
124 ++NumIneligibleFunctions
;
128 // The current implementation for computing `PredecessorDependencies` and
129 // `SuccessorDependencies` runs in quadratic time with respect to the number
130 // of basic blocks. While we do have a more complicated linear time algorithm
131 // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
132 // significant speedup in practice given that most functions tend to be
133 // relatively small in size for intended use cases.
134 auto &EntryBlock
= F
.getEntryBlock();
136 // The set of blocks that are reachable while avoiding BB.
137 BlockSet ReachableFromEntry
, ReachableFromTerminal
;
138 getReachableAvoiding(EntryBlock
, BB
, /*IsForward=*/true,
140 for (auto *TerminalBlock
: TerminalBlocks
)
141 getReachableAvoiding(*TerminalBlock
, BB
, /*IsForward=*/false,
142 ReachableFromTerminal
);
144 auto Preds
= predecessors(&BB
);
145 bool HasSuperReachablePred
= llvm::any_of(Preds
, [&](auto *Pred
) {
146 return ReachableFromEntry
.count(Pred
) &&
147 ReachableFromTerminal
.count(Pred
);
149 if (!HasSuperReachablePred
)
150 for (auto *Pred
: Preds
)
151 if (ReachableFromEntry
.count(Pred
))
152 PredecessorDependencies
[&BB
].insert(Pred
);
154 auto Succs
= successors(&BB
);
155 bool HasSuperReachableSucc
= llvm::any_of(Succs
, [&](auto *Succ
) {
156 return ReachableFromEntry
.count(Succ
) &&
157 ReachableFromTerminal
.count(Succ
);
159 if (!HasSuperReachableSucc
)
160 for (auto *Succ
: Succs
)
161 if (ReachableFromTerminal
.count(Succ
))
162 SuccessorDependencies
[&BB
].insert(Succ
);
165 if (ForceInstrumentEntry
) {
166 // Force the entry block to be instrumented by clearing the blocks it can
167 // infer coverage from.
168 PredecessorDependencies
[&EntryBlock
].clear();
169 SuccessorDependencies
[&EntryBlock
].clear();
172 // Construct a graph where blocks are connected if there is a mutual
173 // dependency between them. This graph has a special property that it contains
175 DenseMap
<const BasicBlock
*, BlockSet
> AdjacencyList
;
177 for (auto *Succ
: successors(&BB
)) {
178 if (SuccessorDependencies
[&BB
].count(Succ
) &&
179 PredecessorDependencies
[Succ
].count(&BB
)) {
180 AdjacencyList
[&BB
].insert(Succ
);
181 AdjacencyList
[Succ
].insert(&BB
);
186 // Given a path with at least one node, return the next node on the path.
187 auto getNextOnPath
= [&](BlockSet
&Path
) -> const BasicBlock
* {
189 auto &Neighbors
= AdjacencyList
[Path
.back()];
190 if (Path
.size() == 1) {
191 // This is the first node on the path, return its neighbor.
192 assert(Neighbors
.size() == 1);
193 return Neighbors
.front();
194 } else if (Neighbors
.size() == 2) {
195 // This is the middle of the path, find the neighbor that is not on the
197 assert(Path
.size() >= 2);
198 return Path
.count(Neighbors
[0]) ? Neighbors
[1] : Neighbors
[0];
200 // This is the end of the path.
201 assert(Neighbors
.size() == 1);
205 // Remove all cycles in the inferencing graph.
207 if (AdjacencyList
[&BB
].size() == 1) {
208 // We found the head of some path.
211 while (const BasicBlock
*Next
= getNextOnPath(Path
))
213 LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path
) << "\n");
215 // Remove these nodes from the graph so we don't discover this path again.
216 for (auto *BB
: Path
)
217 AdjacencyList
[BB
].clear();
219 // Finally, remove the cycles.
220 if (PredecessorDependencies
[Path
.front()].size()) {
221 for (auto *BB
: Path
)
222 if (BB
!= Path
.back())
223 SuccessorDependencies
[BB
].clear();
225 for (auto *BB
: Path
)
226 if (BB
!= Path
.front())
227 PredecessorDependencies
[BB
].clear();
231 LLVM_DEBUG(dump(dbgs()));
234 void BlockCoverageInference::getReachableAvoiding(const BasicBlock
&Start
,
235 const BasicBlock
&Avoid
,
237 BlockSet
&Reachable
) const {
238 df_iterator_default_set
<const BasicBlock
*> Visited
;
239 Visited
.insert(&Avoid
);
241 auto Range
= depth_first_ext(&Start
, Visited
);
242 Reachable
.insert(Range
.begin(), Range
.end());
244 auto Range
= inverse_depth_first_ext(&Start
, Visited
);
245 Reachable
.insert(Range
.begin(), Range
.end());
250 class DotFuncBCIInfo
{
252 const BlockCoverageInference
*BCI
;
253 const DenseMap
<const BasicBlock
*, bool> *Coverage
;
256 DotFuncBCIInfo(const BlockCoverageInference
*BCI
,
257 const DenseMap
<const BasicBlock
*, bool> *Coverage
)
258 : BCI(BCI
), Coverage(Coverage
) {}
260 const Function
&getFunction() { return BCI
->F
; }
262 bool isInstrumented(const BasicBlock
*BB
) const {
263 return BCI
->shouldInstrumentBlock(*BB
);
266 bool isCovered(const BasicBlock
*BB
) const {
267 return Coverage
&& Coverage
->lookup(BB
);
270 bool isDependent(const BasicBlock
*Src
, const BasicBlock
*Dest
) const {
271 return BCI
->getDependencies(*Src
).count(Dest
);
276 struct GraphTraits
<DotFuncBCIInfo
*> : public GraphTraits
<const BasicBlock
*> {
277 static NodeRef
getEntryNode(DotFuncBCIInfo
*Info
) {
278 return &(Info
->getFunction().getEntryBlock());
281 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
282 using nodes_iterator
= pointer_iterator
<Function::const_iterator
>;
284 static nodes_iterator
nodes_begin(DotFuncBCIInfo
*Info
) {
285 return nodes_iterator(Info
->getFunction().begin());
288 static nodes_iterator
nodes_end(DotFuncBCIInfo
*Info
) {
289 return nodes_iterator(Info
->getFunction().end());
292 static size_t size(DotFuncBCIInfo
*Info
) {
293 return Info
->getFunction().size();
298 struct DOTGraphTraits
<DotFuncBCIInfo
*> : public DefaultDOTGraphTraits
{
300 DOTGraphTraits(bool IsSimple
= false) : DefaultDOTGraphTraits(IsSimple
) {}
302 static std::string
getGraphName(DotFuncBCIInfo
*Info
) {
303 return "BCI CFG for " + Info
->getFunction().getName().str();
306 std::string
getNodeLabel(const BasicBlock
*Node
, DotFuncBCIInfo
*Info
) {
307 return Node
->getName().str();
310 std::string
getEdgeAttributes(const BasicBlock
*Src
, const_succ_iterator I
,
311 DotFuncBCIInfo
*Info
) {
312 const BasicBlock
*Dest
= *I
;
313 if (Info
->isDependent(Src
, Dest
))
315 if (Info
->isDependent(Dest
, Src
))
320 std::string
getNodeAttributes(const BasicBlock
*Node
, DotFuncBCIInfo
*Info
) {
322 if (Info
->isInstrumented(Node
))
323 Result
+= "style=filled,fillcolor=gray";
324 if (Info
->isCovered(Node
))
325 Result
+= std::string(Result
.empty() ? "" : ",") + "color=red";
332 void BlockCoverageInference::viewBlockCoverageGraph(
333 const DenseMap
<const BasicBlock
*, bool> *Coverage
) const {
334 DotFuncBCIInfo
Info(this, Coverage
);
335 WriteGraph(&Info
, "BCI", false,
336 "Block Coverage Inference for " + F
.getName());
339 void BlockCoverageInference::dump(raw_ostream
&OS
) const {
340 OS
<< "Minimal block coverage for function \'" << F
.getName()
341 << "\' (Instrumented=*)\n";
343 OS
<< (shouldInstrumentBlock(BB
) ? "* " : " ") << BB
.getName() << "\n";
344 auto It
= PredecessorDependencies
.find(&BB
);
345 if (It
!= PredecessorDependencies
.end() && It
->second
.size())
346 OS
<< " PredDeps = " << getBlockNames(It
->second
) << "\n";
347 It
= SuccessorDependencies
.find(&BB
);
348 if (It
!= SuccessorDependencies
.end() && It
->second
.size())
349 OS
<< " SuccDeps = " << getBlockNames(It
->second
) << "\n";
351 OS
<< " Instrumented Blocks Hash = 0x"
352 << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
356 BlockCoverageInference::getBlockNames(ArrayRef
<const BasicBlock
*> BBs
) {
358 raw_string_ostream
OS(Result
);
361 OS
<< BBs
.front()->getName();
362 BBs
= BBs
.drop_front();
365 OS
<< ", " << BB
->getName();