1 //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines type-erased base types and functions for building dataflow
10 // analyses that run over Control-Flow Graphs (CFGs).
12 //===----------------------------------------------------------------------===//
16 #include <system_error>
20 #include "clang/AST/ASTDumper.h"
21 #include "clang/AST/DeclCXX.h"
22 #include "clang/AST/OperationKinds.h"
23 #include "clang/AST/StmtCXX.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
26 #include "clang/Analysis/CFG.h"
27 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
28 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
29 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
30 #include "clang/Analysis/FlowSensitive/RecordOps.h"
31 #include "clang/Analysis/FlowSensitive/Transfer.h"
32 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
33 #include "clang/Analysis/FlowSensitive/Value.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallBitVector.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/Error.h"
40 #define DEBUG_TYPE "clang-dataflow"
45 /// Returns the index of `Block` in the successors of `Pred`.
46 static int blockIndexInPredecessor(const CFGBlock
&Pred
,
47 const CFGBlock
&Block
) {
48 auto BlockPos
= llvm::find_if(
49 Pred
.succs(), [&Block
](const CFGBlock::AdjacentBlock
&Succ
) {
50 return Succ
&& Succ
->getBlockID() == Block
.getBlockID();
52 return BlockPos
- Pred
.succ_begin();
55 // A "backedge" node is a block introduced in the CFG exclusively to indicate a
56 // loop backedge. They are exactly identified by the presence of a non-null
57 // pointer to the entry block of the loop condition. Note that this is not
58 // necessarily the block with the loop statement as terminator, because
59 // short-circuit operators will result in multiple blocks encoding the loop
60 // condition, only one of which will contain the loop statement as terminator.
61 static bool isBackedgeNode(const CFGBlock
&B
) {
62 return B
.getLoopTarget() != nullptr;
67 // The return type of the visit functions in TerminatorVisitor. The first
68 // element represents the terminator expression (that is the conditional
69 // expression in case of a path split in the CFG). The second element
70 // represents whether the condition was true or false.
71 using TerminatorVisitorRetTy
= std::pair
<const Expr
*, bool>;
73 /// Extends the flow condition of an environment based on a terminator
75 class TerminatorVisitor
76 : public ConstStmtVisitor
<TerminatorVisitor
, TerminatorVisitorRetTy
> {
78 TerminatorVisitor(const StmtToEnvMap
&StmtToEnv
, Environment
&Env
,
80 : StmtToEnv(StmtToEnv
), Env(Env
), BlockSuccIdx(BlockSuccIdx
) {}
82 TerminatorVisitorRetTy
VisitIfStmt(const IfStmt
*S
) {
83 auto *Cond
= S
->getCond();
84 assert(Cond
!= nullptr);
85 return extendFlowCondition(*Cond
);
88 TerminatorVisitorRetTy
VisitWhileStmt(const WhileStmt
*S
) {
89 auto *Cond
= S
->getCond();
90 assert(Cond
!= nullptr);
91 return extendFlowCondition(*Cond
);
94 TerminatorVisitorRetTy
VisitDoStmt(const DoStmt
*S
) {
95 auto *Cond
= S
->getCond();
96 assert(Cond
!= nullptr);
97 return extendFlowCondition(*Cond
);
100 TerminatorVisitorRetTy
VisitForStmt(const ForStmt
*S
) {
101 auto *Cond
= S
->getCond();
103 return extendFlowCondition(*Cond
);
104 return {nullptr, false};
107 TerminatorVisitorRetTy
VisitCXXForRangeStmt(const CXXForRangeStmt
*) {
108 // Don't do anything special for CXXForRangeStmt, because the condition
109 // (being implicitly generated) isn't visible from the loop body.
110 return {nullptr, false};
113 TerminatorVisitorRetTy
VisitBinaryOperator(const BinaryOperator
*S
) {
114 assert(S
->getOpcode() == BO_LAnd
|| S
->getOpcode() == BO_LOr
);
115 auto *LHS
= S
->getLHS();
116 assert(LHS
!= nullptr);
117 return extendFlowCondition(*LHS
);
120 TerminatorVisitorRetTy
121 VisitConditionalOperator(const ConditionalOperator
*S
) {
122 auto *Cond
= S
->getCond();
123 assert(Cond
!= nullptr);
124 return extendFlowCondition(*Cond
);
128 TerminatorVisitorRetTy
extendFlowCondition(const Expr
&Cond
) {
129 // The terminator sub-expression might not be evaluated.
130 if (Env
.getValue(Cond
) == nullptr)
131 transfer(StmtToEnv
, Cond
, Env
);
133 auto *Val
= cast_or_null
<BoolValue
>(Env
.getValue(Cond
));
134 // Value merging depends on flow conditions from different environments
135 // being mutually exclusive -- that is, they cannot both be true in their
136 // entirety (even if they may share some clauses). So, we need *some* value
137 // for the condition expression, even if just an atom.
138 if (Val
== nullptr) {
139 Val
= &Env
.makeAtomicBoolValue();
140 Env
.setValue(Cond
, *Val
);
143 bool ConditionValue
= true;
144 // The condition must be inverted for the successor that encompasses the
145 // "else" branch, if such exists.
146 if (BlockSuccIdx
== 1) {
147 Val
= &Env
.makeNot(*Val
);
148 ConditionValue
= false;
151 Env
.assume(Val
->formula());
152 return {&Cond
, ConditionValue
};
155 const StmtToEnvMap
&StmtToEnv
;
160 /// Holds data structures required for running dataflow analysis.
161 struct AnalysisContext
{
162 AnalysisContext(const ControlFlowContext
&CFCtx
,
163 TypeErasedDataflowAnalysis
&Analysis
,
164 const Environment
&InitEnv
,
165 llvm::ArrayRef
<std::optional
<TypeErasedDataflowAnalysisState
>>
167 : CFCtx(CFCtx
), Analysis(Analysis
), InitEnv(InitEnv
),
168 Log(*InitEnv
.getDataflowAnalysisContext().getOptions().Log
),
169 BlockStates(BlockStates
) {
170 Log
.beginAnalysis(CFCtx
, Analysis
);
172 ~AnalysisContext() { Log
.endAnalysis(); }
174 /// Contains the CFG being analyzed.
175 const ControlFlowContext
&CFCtx
;
176 /// The analysis to be run.
177 TypeErasedDataflowAnalysis
&Analysis
;
178 /// Initial state to start the analysis.
179 const Environment
&InitEnv
;
181 /// Stores the state of a CFG block if it has been evaluated by the analysis.
182 /// The indices correspond to the block IDs.
183 llvm::ArrayRef
<std::optional
<TypeErasedDataflowAnalysisState
>> BlockStates
;
186 class PrettyStackTraceAnalysis
: public llvm::PrettyStackTraceEntry
{
188 PrettyStackTraceAnalysis(const ControlFlowContext
&CFCtx
, const char *Message
)
189 : CFCtx(CFCtx
), Message(Message
) {}
191 void print(raw_ostream
&OS
) const override
{
192 OS
<< Message
<< "\n";
194 CFCtx
.getDecl().dump(OS
);
196 CFCtx
.getCFG().print(OS
, LangOptions(), false);
200 const ControlFlowContext
&CFCtx
;
204 class PrettyStackTraceCFGElement
: public llvm::PrettyStackTraceEntry
{
206 PrettyStackTraceCFGElement(const CFGElement
&Element
, int BlockIdx
,
207 int ElementIdx
, const char *Message
)
208 : Element(Element
), BlockIdx(BlockIdx
), ElementIdx(ElementIdx
),
211 void print(raw_ostream
&OS
) const override
{
212 OS
<< Message
<< ": Element [B" << BlockIdx
<< "." << ElementIdx
<< "]\n";
213 if (auto Stmt
= Element
.getAs
<CFGStmt
>()) {
215 ASTDumper
Dumper(OS
, false);
216 Dumper
.Visit(Stmt
->getStmt());
221 const CFGElement
&Element
;
227 // Builds a joined TypeErasedDataflowAnalysisState from 0 or more sources,
228 // each of which may be owned (built as part of the join) or external (a
229 // reference to an Environment that will outlive the builder).
230 // Avoids unneccesary copies of the environment.
231 class JoinedStateBuilder
{
233 std::vector
<const TypeErasedDataflowAnalysisState
*> All
;
234 std::deque
<TypeErasedDataflowAnalysisState
> Owned
;
236 TypeErasedDataflowAnalysisState
237 join(const TypeErasedDataflowAnalysisState
&L
,
238 const TypeErasedDataflowAnalysisState
&R
) {
239 return {AC
.Analysis
.joinTypeErased(L
.Lattice
, R
.Lattice
),
240 Environment::join(L
.Env
, R
.Env
, AC
.Analysis
)};
244 JoinedStateBuilder(AnalysisContext
&AC
) : AC(AC
) {}
246 void addOwned(TypeErasedDataflowAnalysisState State
) {
247 Owned
.push_back(std::move(State
));
248 All
.push_back(&Owned
.back());
250 void addUnowned(const TypeErasedDataflowAnalysisState
&State
) {
251 All
.push_back(&State
);
253 TypeErasedDataflowAnalysisState
take() && {
255 // FIXME: Consider passing `Block` to Analysis.typeErasedInitialElement
256 // to enable building analyses like computation of dominators that
257 // initialize the state of each basic block differently.
258 return {AC
.Analysis
.typeErasedInitialElement(), AC
.InitEnv
.fork()};
260 return Owned
.empty() ? All
.front()->fork() : std::move(Owned
.front());
262 auto Result
= join(*All
[0], *All
[1]);
263 for (unsigned I
= 2; I
< All
.size(); ++I
)
264 Result
= join(Result
, *All
[I
]);
271 /// Computes the input state for a given basic block by joining the output
272 /// states of its predecessors.
276 /// All predecessors of `Block` except those with loop back edges must have
277 /// already been transferred. States in `AC.BlockStates` that are set to
278 /// `std::nullopt` represent basic blocks that are not evaluated yet.
279 static TypeErasedDataflowAnalysisState
280 computeBlockInputState(const CFGBlock
&Block
, AnalysisContext
&AC
) {
281 std::vector
<const CFGBlock
*> Preds(Block
.pred_begin(), Block
.pred_end());
282 if (Block
.getTerminator().isTemporaryDtorsBranch()) {
283 // This handles a special case where the code that produced the CFG includes
284 // a conditional operator with a branch that constructs a temporary and
285 // calls a destructor annotated as noreturn. The CFG models this as follows:
287 // B1 (contains the condition of the conditional operator) - succs: B2, B3
288 // B2 (contains code that does not call a noreturn destructor) - succs: B4
289 // B3 (contains code that calls a noreturn destructor) - succs: B4
290 // B4 (has temporary destructor terminator) - succs: B5, B6
291 // B5 (noreturn block that is associated with the noreturn destructor call)
292 // B6 (contains code that follows the conditional operator statement)
294 // The first successor (B5 above) of a basic block with a temporary
295 // destructor terminator (B4 above) is the block that evaluates the
296 // destructor. If that block has a noreturn element then the predecessor
297 // block that constructed the temporary object (B3 above) is effectively a
298 // noreturn block and its state should not be used as input for the state
299 // of the block that has a temporary destructor terminator (B4 above). This
300 // holds regardless of which branch of the ternary operator calls the
301 // noreturn destructor. However, it doesn't cases where a nested ternary
302 // operator includes a branch that contains a noreturn destructor call.
304 // See `NoreturnDestructorTest` for concrete examples.
305 if (Block
.succ_begin()->getReachableBlock() != nullptr &&
306 Block
.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
307 auto &StmtToBlock
= AC
.CFCtx
.getStmtToBlock();
308 auto StmtBlock
= StmtToBlock
.find(Block
.getTerminatorStmt());
309 assert(StmtBlock
!= StmtToBlock
.end());
310 llvm::erase(Preds
, StmtBlock
->getSecond());
314 JoinedStateBuilder
Builder(AC
);
315 for (const CFGBlock
*Pred
: Preds
) {
316 // Skip if the `Block` is unreachable or control flow cannot get past it.
317 if (!Pred
|| Pred
->hasNoReturnElement())
320 // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
321 // loop back edge to `Block`.
322 const std::optional
<TypeErasedDataflowAnalysisState
> &MaybePredState
=
323 AC
.BlockStates
[Pred
->getBlockID()];
327 if (AC
.Analysis
.builtinOptions()) {
328 if (const Stmt
*PredTerminatorStmt
= Pred
->getTerminatorStmt()) {
329 // We have a terminator: we need to mutate an environment to describe
330 // when the terminator is taken. Copy now.
331 TypeErasedDataflowAnalysisState Copy
= MaybePredState
->fork();
333 const StmtToEnvMap
StmtToEnv(AC
.CFCtx
, AC
.BlockStates
);
334 auto [Cond
, CondValue
] =
335 TerminatorVisitor(StmtToEnv
, Copy
.Env
,
336 blockIndexInPredecessor(*Pred
, Block
))
337 .Visit(PredTerminatorStmt
);
339 // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
341 AC
.Analysis
.transferBranchTypeErased(CondValue
, Cond
, Copy
.Lattice
,
343 Builder
.addOwned(std::move(Copy
));
347 Builder
.addUnowned(*MaybePredState
);
349 return std::move(Builder
).take();
352 /// Built-in transfer function for `CFGStmt`.
354 builtinTransferStatement(const CFGStmt
&Elt
,
355 TypeErasedDataflowAnalysisState
&InputState
,
356 AnalysisContext
&AC
) {
357 const Stmt
*S
= Elt
.getStmt();
358 assert(S
!= nullptr);
359 transfer(StmtToEnvMap(AC
.CFCtx
, AC
.BlockStates
), *S
, InputState
.Env
);
362 /// Built-in transfer function for `CFGInitializer`.
364 builtinTransferInitializer(const CFGInitializer
&Elt
,
365 TypeErasedDataflowAnalysisState
&InputState
) {
366 const CXXCtorInitializer
*Init
= Elt
.getInitializer();
367 assert(Init
!= nullptr);
369 auto &Env
= InputState
.Env
;
370 auto &ThisLoc
= *Env
.getThisPointeeStorageLocation();
372 if (!Init
->isAnyMemberInitializer())
373 // FIXME: Handle base initialization
376 auto *InitExpr
= Init
->getInit();
377 assert(InitExpr
!= nullptr);
379 const FieldDecl
*Member
= nullptr;
380 RecordStorageLocation
*ParentLoc
= &ThisLoc
;
381 StorageLocation
*MemberLoc
= nullptr;
382 if (Init
->isMemberInitializer()) {
383 Member
= Init
->getMember();
384 MemberLoc
= ThisLoc
.getChild(*Member
);
386 IndirectFieldDecl
*IndirectField
= Init
->getIndirectMember();
387 assert(IndirectField
!= nullptr);
388 MemberLoc
= &ThisLoc
;
389 for (const auto *I
: IndirectField
->chain()) {
390 Member
= cast
<FieldDecl
>(I
);
391 ParentLoc
= cast
<RecordStorageLocation
>(MemberLoc
);
392 MemberLoc
= ParentLoc
->getChild(*Member
);
395 assert(Member
!= nullptr);
396 assert(MemberLoc
!= nullptr);
398 // FIXME: Instead of these case distinctions, we would ideally want to be able
399 // to simply use `Environment::createObject()` here, the same way that we do
400 // this in `TransferVisitor::VisitInitListExpr()`. However, this would require
401 // us to be able to build a list of fields that we then use to initialize an
402 // `RecordStorageLocation` -- and the problem is that, when we get here,
403 // the `RecordStorageLocation` already exists. We should explore if there's
404 // anything that we can do to change this.
405 if (Member
->getType()->isReferenceType()) {
406 auto *InitExprLoc
= Env
.getStorageLocation(*InitExpr
);
407 if (InitExprLoc
== nullptr)
410 ParentLoc
->setChild(*Member
, InitExprLoc
);
411 } else if (auto *InitExprVal
= Env
.getValue(*InitExpr
)) {
412 if (Member
->getType()->isRecordType()) {
413 auto *InitValStruct
= cast
<RecordValue
>(InitExprVal
);
414 // FIXME: Rather than performing a copy here, we should really be
415 // initializing the field in place. This would require us to propagate the
416 // storage location of the field to the AST node that creates the
418 copyRecord(InitValStruct
->getLoc(),
419 *cast
<RecordStorageLocation
>(MemberLoc
), Env
);
421 Env
.setValue(*MemberLoc
, *InitExprVal
);
426 static void builtinTransfer(const CFGElement
&Elt
,
427 TypeErasedDataflowAnalysisState
&State
,
428 AnalysisContext
&AC
) {
429 switch (Elt
.getKind()) {
430 case CFGElement::Statement
:
431 builtinTransferStatement(Elt
.castAs
<CFGStmt
>(), State
, AC
);
433 case CFGElement::Initializer
:
434 builtinTransferInitializer(Elt
.castAs
<CFGInitializer
>(), State
);
436 case CFGElement::LifetimeEnds
:
437 // Removing declarations when their lifetime ends serves two purposes:
438 // - Eliminate unnecessary clutter from `Environment::DeclToLoc`
439 // - Allow us to assert that, when joining two `Environment`s, the two
440 // `DeclToLoc` maps never contain entries that map the same declaration to
441 // different storage locations.
442 if (const ValueDecl
*VD
= Elt
.castAs
<CFGLifetimeEnds
>().getVarDecl())
443 State
.Env
.removeDecl(*VD
);
446 // FIXME: Evaluate other kinds of `CFGElement`
451 /// Transfers `State` by evaluating each element in the `Block` based on the
452 /// `AC.Analysis` specified.
454 /// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set
455 /// by the analysis) will be applied to the element before evaluation by the
456 /// user-specified analysis.
457 /// `PostVisitCFG` (if provided) will be applied to the element after evaluation
458 /// by the user-specified analysis.
459 static TypeErasedDataflowAnalysisState
460 transferCFGBlock(const CFGBlock
&Block
, AnalysisContext
&AC
,
461 std::function
<void(const CFGElement
&,
462 const TypeErasedDataflowAnalysisState
&)>
463 PostVisitCFG
= nullptr) {
464 AC
.Log
.enterBlock(Block
, PostVisitCFG
!= nullptr);
465 auto State
= computeBlockInputState(Block
, AC
);
466 AC
.Log
.recordState(State
);
468 for (const auto &Element
: Block
) {
469 PrettyStackTraceCFGElement
CrashInfo(Element
, Block
.getBlockID(),
470 ElementIdx
++, "transferCFGBlock");
472 AC
.Log
.enterElement(Element
);
474 if (AC
.Analysis
.builtinOptions()) {
475 builtinTransfer(Element
, State
, AC
);
478 // User-provided analysis
479 AC
.Analysis
.transferTypeErased(Element
, State
.Lattice
, State
.Env
);
483 PostVisitCFG(Element
, State
);
485 AC
.Log
.recordState(State
);
490 llvm::Expected
<std::vector
<std::optional
<TypeErasedDataflowAnalysisState
>>>
491 runTypeErasedDataflowAnalysis(
492 const ControlFlowContext
&CFCtx
, TypeErasedDataflowAnalysis
&Analysis
,
493 const Environment
&InitEnv
,
494 std::function
<void(const CFGElement
&,
495 const TypeErasedDataflowAnalysisState
&)>
497 PrettyStackTraceAnalysis
CrashInfo(CFCtx
, "runTypeErasedDataflowAnalysis");
499 const clang::CFG
&CFG
= CFCtx
.getCFG();
500 PostOrderCFGView
POV(&CFG
);
501 ForwardDataflowWorklist
Worklist(CFG
, &POV
);
503 std::vector
<std::optional
<TypeErasedDataflowAnalysisState
>> BlockStates(
506 // The entry basic block doesn't contain statements so it can be skipped.
507 const CFGBlock
&Entry
= CFG
.getEntry();
508 BlockStates
[Entry
.getBlockID()] = {Analysis
.typeErasedInitialElement(),
510 Worklist
.enqueueSuccessors(&Entry
);
512 AnalysisContext
AC(CFCtx
, Analysis
, InitEnv
, BlockStates
);
514 // Bugs in lattices and transfer functions can prevent the analysis from
515 // converging. To limit the damage (infinite loops) that these bugs can cause,
516 // limit the number of iterations.
517 // FIXME: Consider making the maximum number of iterations configurable.
518 // FIXME: Consider restricting the number of backedges followed, rather than
520 // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
521 // of iterations, number of functions that time out, etc.
522 static constexpr uint32_t MaxAverageVisitsPerBlock
= 4;
523 static constexpr uint32_t AbsoluteMaxIterations
= 1 << 16;
524 const uint32_t RelativeMaxIterations
=
525 MaxAverageVisitsPerBlock
* BlockStates
.size();
526 const uint32_t MaxIterations
=
527 std::min(RelativeMaxIterations
, AbsoluteMaxIterations
);
528 uint32_t Iterations
= 0;
529 while (const CFGBlock
*Block
= Worklist
.dequeue()) {
530 LLVM_DEBUG(llvm::dbgs()
531 << "Processing Block " << Block
->getBlockID() << "\n");
532 if (++Iterations
> MaxIterations
) {
533 return llvm::createStringError(std::errc::timed_out
,
534 "maximum number of iterations reached");
537 const std::optional
<TypeErasedDataflowAnalysisState
> &OldBlockState
=
538 BlockStates
[Block
->getBlockID()];
539 TypeErasedDataflowAnalysisState NewBlockState
=
540 transferCFGBlock(*Block
, AC
);
542 llvm::errs() << "New Env:\n";
543 NewBlockState
.Env
.dump();
548 llvm::errs() << "Old Env:\n";
549 OldBlockState
->Env
.dump();
551 if (isBackedgeNode(*Block
)) {
552 LatticeJoinEffect Effect1
= Analysis
.widenTypeErased(
553 NewBlockState
.Lattice
, OldBlockState
->Lattice
);
554 LatticeJoinEffect Effect2
=
555 NewBlockState
.Env
.widen(OldBlockState
->Env
, Analysis
);
556 if (Effect1
== LatticeJoinEffect::Unchanged
&&
557 Effect2
== LatticeJoinEffect::Unchanged
) {
558 // The state of `Block` didn't change from widening so there's no need
559 // to revisit its successors.
560 AC
.Log
.blockConverged();
563 } else if (Analysis
.isEqualTypeErased(OldBlockState
->Lattice
,
564 NewBlockState
.Lattice
) &&
565 OldBlockState
->Env
.equivalentTo(NewBlockState
.Env
, Analysis
)) {
566 // The state of `Block` didn't change after transfer so there's no need
567 // to revisit its successors.
568 AC
.Log
.blockConverged();
573 BlockStates
[Block
->getBlockID()] = std::move(NewBlockState
);
575 // Do not add unreachable successor blocks to `Worklist`.
576 if (Block
->hasNoReturnElement())
579 Worklist
.enqueueSuccessors(Block
);
581 // FIXME: Consider evaluating unreachable basic blocks (those that have a
582 // state set to `std::nullopt` at this point) to also analyze dead code.
585 for (const CFGBlock
*Block
: CFCtx
.getCFG()) {
586 // Skip blocks that were not evaluated.
587 if (!BlockStates
[Block
->getBlockID()])
589 transferCFGBlock(*Block
, AC
, PostVisitCFG
);
593 return std::move(BlockStates
);
596 } // namespace dataflow