1 //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the template classes ExplodedNode and ExplodedGraph,
10 // which represent a path-sensitive, intra-procedural "exploded graph."
12 //===----------------------------------------------------------------------===//
14 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprObjC.h"
17 #include "clang/AST/ParentMap.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/Analysis/CFGStmtMap.h"
20 #include "clang/Analysis/ProgramPoint.h"
21 #include "clang/Analysis/Support/BumpVector.h"
22 #include "clang/Basic/LLVM.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/FoldingSet.h"
28 #include "llvm/ADT/PointerUnion.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Casting.h"
35 using namespace clang
;
38 //===----------------------------------------------------------------------===//
40 //===----------------------------------------------------------------------===//
42 ExplodedGraph::ExplodedGraph() = default;
44 ExplodedGraph::~ExplodedGraph() = default;
46 //===----------------------------------------------------------------------===//
48 //===----------------------------------------------------------------------===//
50 bool ExplodedGraph::isInterestingLValueExpr(const Expr
*Ex
) {
53 return isa
<DeclRefExpr
, MemberExpr
, ObjCIvarRefExpr
, ArraySubscriptExpr
>(Ex
);
56 bool ExplodedGraph::shouldCollect(const ExplodedNode
*node
) {
57 // First, we only consider nodes for reclamation of the following
60 // (1) 1 predecessor (that has one successor)
61 // (2) 1 successor (that has one predecessor)
63 // If a node has no successor it is on the "frontier", while a node
64 // with no predecessor is a root.
66 // After these prerequisites, we discard all "filler" nodes that
67 // are used only for intermediate processing, and are not essential
68 // for analyzer history:
70 // (a) PreStmtPurgeDeadSymbols
72 // We then discard all other nodes where *all* of the following conditions
75 // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
76 // (4) There is no 'tag' for the ProgramPoint.
77 // (5) The 'store' is the same as the predecessor.
78 // (6) The 'GDM' is the same as the predecessor.
79 // (7) The LocationContext is the same as the predecessor.
80 // (8) Expressions that are *not* lvalue expressions.
81 // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
82 // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
83 // PreImplicitCall (so that we would be able to find it when retrying a
84 // call with no inlining).
85 // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
87 // Conditions 1 and 2.
88 if (node
->pred_size() != 1 || node
->succ_size() != 1)
91 const ExplodedNode
*pred
= *(node
->pred_begin());
92 if (pred
->succ_size() != 1)
95 const ExplodedNode
*succ
= *(node
->succ_begin());
96 if (succ
->pred_size() != 1)
99 // Now reclaim any nodes that are (by definition) not essential to
100 // analysis history and are not consulted by any client code.
101 ProgramPoint progPoint
= node
->getLocation();
102 if (progPoint
.getAs
<PreStmtPurgeDeadSymbols
>())
103 return !progPoint
.getTag();
106 if (!progPoint
.getAs
<PostStmt
>() || progPoint
.getAs
<PostStore
>())
110 if (progPoint
.getTag())
113 // Conditions 5, 6, and 7.
114 ProgramStateRef state
= node
->getState();
115 ProgramStateRef pred_state
= pred
->getState();
116 if (state
->store
!= pred_state
->store
|| state
->GDM
!= pred_state
->GDM
||
117 progPoint
.getLocationContext() != pred
->getLocationContext())
120 // All further checks require expressions. As per #3, we know that we have
122 const Expr
*Ex
= dyn_cast
<Expr
>(progPoint
.castAs
<PostStmt
>().getStmt());
127 // Do not collect nodes for "interesting" lvalue expressions since they are
128 // used extensively for generating path diagnostics.
129 if (isInterestingLValueExpr(Ex
))
133 // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
134 // diagnostic generation; specifically, so that we could anchor arrows
135 // pointing to the beginning of statements (as written in code).
136 const ParentMap
&PM
= progPoint
.getLocationContext()->getParentMap();
137 if (!PM
.isConsumedExpr(Ex
))
141 const ProgramPoint SuccLoc
= succ
->getLocation();
142 if (std::optional
<StmtPoint
> SP
= SuccLoc
.getAs
<StmtPoint
>())
143 if (CallEvent::isCallStmt(SP
->getStmt()))
146 // Condition 10, continuation.
147 if (SuccLoc
.getAs
<CallEnter
>() || SuccLoc
.getAs
<PreImplicitCall
>())
153 void ExplodedGraph::collectNode(ExplodedNode
*node
) {
154 // Removing a node means:
155 // (a) changing the predecessors successor to the successor of this node
156 // (b) changing the successors predecessor to the predecessor of this node
157 // (c) Putting 'node' onto freeNodes.
158 assert(node
->pred_size() == 1 || node
->succ_size() == 1);
159 ExplodedNode
*pred
= *(node
->pred_begin());
160 ExplodedNode
*succ
= *(node
->succ_begin());
161 pred
->replaceSuccessor(succ
);
162 succ
->replacePredecessor(pred
);
163 FreeNodes
.push_back(node
);
164 Nodes
.RemoveNode(node
);
166 node
->~ExplodedNode();
169 void ExplodedGraph::reclaimRecentlyAllocatedNodes() {
170 if (ChangedNodes
.empty())
173 // Only periodically reclaim nodes so that we can build up a set of
174 // nodes that meet the reclamation criteria. Freshly created nodes
175 // by definition have no successor, and thus cannot be reclaimed (see below).
176 assert(ReclaimCounter
> 0);
177 if (--ReclaimCounter
!= 0)
179 ReclaimCounter
= ReclaimNodeInterval
;
181 for (const auto node
: ChangedNodes
)
182 if (shouldCollect(node
))
184 ChangedNodes
.clear();
187 //===----------------------------------------------------------------------===//
189 //===----------------------------------------------------------------------===//
191 // An NodeGroup's storage type is actually very much like a TinyPtrVector:
192 // it can be either a pointer to a single ExplodedNode, or a pointer to a
193 // BumpVector allocated with the ExplodedGraph's allocator. This allows the
194 // common case of single-node NodeGroups to be implemented with no extra memory.
196 // Consequently, each of the NodeGroup methods have up to four cases to handle:
197 // 1. The flag is set and this group does not actually contain any nodes.
198 // 2. The group is empty, in which case the storage value is null.
199 // 3. The group contains a single node.
200 // 4. The group contains more than one node.
201 using ExplodedNodeVector
= BumpVector
<ExplodedNode
*>;
202 using GroupStorage
= llvm::PointerUnion
<ExplodedNode
*, ExplodedNodeVector
*>;
204 void ExplodedNode::addPredecessor(ExplodedNode
*V
, ExplodedGraph
&G
) {
205 assert(!V
->isSink());
207 V
->Succs
.addNode(this, G
);
210 void ExplodedNode::NodeGroup::replaceNode(ExplodedNode
*node
) {
213 GroupStorage
&Storage
= reinterpret_cast<GroupStorage
&>(P
);
214 assert(isa
<ExplodedNode
*>(Storage
));
216 assert(isa
<ExplodedNode
*>(Storage
));
219 void ExplodedNode::NodeGroup::addNode(ExplodedNode
*N
, ExplodedGraph
&G
) {
222 GroupStorage
&Storage
= reinterpret_cast<GroupStorage
&>(P
);
223 if (Storage
.isNull()) {
225 assert(isa
<ExplodedNode
*>(Storage
));
229 ExplodedNodeVector
*V
= Storage
.dyn_cast
<ExplodedNodeVector
*>();
232 // Switch from single-node to multi-node representation.
233 auto *Old
= cast
<ExplodedNode
*>(Storage
);
235 BumpVectorContext
&Ctx
= G
.getNodeAllocator();
236 V
= new (G
.getAllocator()) ExplodedNodeVector(Ctx
, 4);
237 V
->push_back(Old
, Ctx
);
241 assert(isa
<ExplodedNodeVector
*>(Storage
));
244 V
->push_back(N
, G
.getNodeAllocator());
247 unsigned ExplodedNode::NodeGroup::size() const {
251 const GroupStorage
&Storage
= reinterpret_cast<const GroupStorage
&>(P
);
252 if (Storage
.isNull())
254 if (ExplodedNodeVector
*V
= Storage
.dyn_cast
<ExplodedNodeVector
*>())
259 ExplodedNode
* const *ExplodedNode::NodeGroup::begin() const {
263 const GroupStorage
&Storage
= reinterpret_cast<const GroupStorage
&>(P
);
264 if (Storage
.isNull())
266 if (ExplodedNodeVector
*V
= Storage
.dyn_cast
<ExplodedNodeVector
*>())
268 return Storage
.getAddrOfPtr1();
271 ExplodedNode
* const *ExplodedNode::NodeGroup::end() const {
275 const GroupStorage
&Storage
= reinterpret_cast<const GroupStorage
&>(P
);
276 if (Storage
.isNull())
278 if (ExplodedNodeVector
*V
= Storage
.dyn_cast
<ExplodedNodeVector
*>())
280 return Storage
.getAddrOfPtr1() + 1;
283 bool ExplodedNode::isTrivial() const {
284 return pred_size() == 1 && succ_size() == 1 &&
285 getFirstPred()->getState()->getID() == getState()->getID() &&
286 getFirstPred()->succ_size() == 1;
289 const CFGBlock
*ExplodedNode::getCFGBlock() const {
290 ProgramPoint P
= getLocation();
291 if (auto BEP
= P
.getAs
<BlockEntrance
>())
292 return BEP
->getBlock();
294 // Find the node's current statement in the CFG.
295 // FIXME: getStmtForDiagnostics() does nasty things in order to provide
296 // a valid statement for body farms, do we need this behavior here?
297 if (const Stmt
*S
= getStmtForDiagnostics())
298 return getLocationContext()
299 ->getAnalysisDeclContext()
306 static const LocationContext
*
307 findTopAutosynthesizedParentContext(const LocationContext
*LC
) {
308 assert(LC
->getAnalysisDeclContext()->isBodyAutosynthesized());
309 const LocationContext
*ParentLC
= LC
->getParent();
310 assert(ParentLC
&& "We don't start analysis from autosynthesized code");
311 while (ParentLC
->getAnalysisDeclContext()->isBodyAutosynthesized()) {
313 ParentLC
= LC
->getParent();
314 assert(ParentLC
&& "We don't start analysis from autosynthesized code");
319 const Stmt
*ExplodedNode::getStmtForDiagnostics() const {
320 // We cannot place diagnostics on autosynthesized code.
321 // Put them onto the call site through which we jumped into autosynthesized
322 // code for the first time.
323 const LocationContext
*LC
= getLocationContext();
324 if (LC
->getAnalysisDeclContext()->isBodyAutosynthesized()) {
325 // It must be a stack frame because we only autosynthesize functions.
326 return cast
<StackFrameContext
>(findTopAutosynthesizedParentContext(LC
))
329 // Otherwise, see if the node's program point directly points to a statement.
330 // FIXME: Refactor into a ProgramPoint method?
331 ProgramPoint P
= getLocation();
332 if (auto SP
= P
.getAs
<StmtPoint
>())
333 return SP
->getStmt();
334 if (auto BE
= P
.getAs
<BlockEdge
>())
335 return BE
->getSrc()->getTerminatorStmt();
336 if (auto CE
= P
.getAs
<CallEnter
>())
337 return CE
->getCallExpr();
338 if (auto CEE
= P
.getAs
<CallExitEnd
>())
339 return CEE
->getCalleeContext()->getCallSite();
340 if (auto PIPP
= P
.getAs
<PostInitializer
>())
341 return PIPP
->getInitializer()->getInit();
342 if (auto CEB
= P
.getAs
<CallExitBegin
>())
343 return CEB
->getReturnStmt();
344 if (auto FEP
= P
.getAs
<FunctionExitPoint
>())
345 return FEP
->getStmt();
350 const Stmt
*ExplodedNode::getNextStmtForDiagnostics() const {
351 for (const ExplodedNode
*N
= getFirstSucc(); N
; N
= N
->getFirstSucc()) {
352 if (N
->getLocation().isPurgeKind())
354 if (const Stmt
*S
= N
->getStmtForDiagnostics()) {
355 // Check if the statement is '?' or '&&'/'||'. These are "merges",
356 // not actual statement points.
357 switch (S
->getStmtClass()) {
358 case Stmt::ChooseExprClass
:
359 case Stmt::BinaryConditionalOperatorClass
:
360 case Stmt::ConditionalOperatorClass
:
362 case Stmt::BinaryOperatorClass
: {
363 BinaryOperatorKind Op
= cast
<BinaryOperator
>(S
)->getOpcode();
364 if (Op
== BO_LAnd
|| Op
== BO_LOr
)
371 // We found the statement, so return it.
379 const Stmt
*ExplodedNode::getPreviousStmtForDiagnostics() const {
380 for (const ExplodedNode
*N
= getFirstPred(); N
; N
= N
->getFirstPred())
381 if (const Stmt
*S
= N
->getStmtForDiagnostics(); S
&& !isa
<CompoundStmt
>(S
))
387 const Stmt
*ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const {
388 if (const Stmt
*S
= getStmtForDiagnostics())
391 return getPreviousStmtForDiagnostics();
394 ExplodedNode
*ExplodedGraph::getNode(const ProgramPoint
&L
,
395 ProgramStateRef State
,
398 // Profile 'State' to determine if we already have an existing node.
399 llvm::FoldingSetNodeID profile
;
400 void *InsertPos
= nullptr;
402 NodeTy::Profile(profile
, L
, State
, IsSink
);
403 NodeTy
* V
= Nodes
.FindNodeOrInsertPos(profile
, InsertPos
);
406 if (!FreeNodes
.empty()) {
407 V
= FreeNodes
.back();
408 FreeNodes
.pop_back();
411 // Allocate a new node.
412 V
= getAllocator().Allocate
<NodeTy
>();
416 new (V
) NodeTy(L
, State
, NumNodes
, IsSink
);
418 if (ReclaimNodeInterval
)
419 ChangedNodes
.push_back(V
);
421 // Insert the node into the node set and return it.
422 Nodes
.InsertNode(V
, InsertPos
);
424 if (IsNew
) *IsNew
= true;
427 if (IsNew
) *IsNew
= false;
432 ExplodedNode
*ExplodedGraph::createUncachedNode(const ProgramPoint
&L
,
433 ProgramStateRef State
,
436 NodeTy
*V
= getAllocator().Allocate
<NodeTy
>();
437 new (V
) NodeTy(L
, State
, Id
, IsSink
);
441 std::unique_ptr
<ExplodedGraph
>
442 ExplodedGraph::trim(ArrayRef
<const NodeTy
*> Sinks
,
443 InterExplodedGraphMap
*ForwardMap
,
444 InterExplodedGraphMap
*InverseMap
) const {
448 using Pass1Ty
= llvm::DenseSet
<const ExplodedNode
*>;
451 using Pass2Ty
= InterExplodedGraphMap
;
452 InterExplodedGraphMap Pass2Scratch
;
453 Pass2Ty
&Pass2
= ForwardMap
? *ForwardMap
: Pass2Scratch
;
455 SmallVector
<const ExplodedNode
*, 10> WL1
, WL2
;
457 // ===- Pass 1 (reverse DFS) -===
458 for (const auto Sink
: Sinks
)
462 // Process the first worklist until it is empty.
463 while (!WL1
.empty()) {
464 const ExplodedNode
*N
= WL1
.pop_back_val();
466 // Have we already visited this node? If so, continue to the next one.
467 if (!Pass1
.insert(N
).second
)
470 // If this is a root enqueue it to the second worklist.
471 if (N
->Preds
.empty()) {
476 // Visit our predecessors and enqueue them.
477 WL1
.append(N
->Preds
.begin(), N
->Preds
.end());
480 // We didn't hit a root? Return with a null pointer for the new graph.
484 // Create an empty graph.
485 std::unique_ptr
<ExplodedGraph
> G
= MakeEmptyGraph();
487 // ===- Pass 2 (forward DFS to construct the new graph) -===
488 while (!WL2
.empty()) {
489 const ExplodedNode
*N
= WL2
.pop_back_val();
491 // Skip this node if we have already processed it.
492 if (Pass2
.contains(N
))
495 // Create the corresponding node in the new graph and record the mapping
496 // from the old node to the new node.
497 ExplodedNode
*NewN
= G
->createUncachedNode(N
->getLocation(), N
->State
,
498 N
->getID(), N
->isSink());
501 // Also record the reverse mapping from the new node to the old node.
502 if (InverseMap
) (*InverseMap
)[NewN
] = N
;
504 // If this node is a root, designate it as such in the graph.
505 if (N
->Preds
.empty())
508 // In the case that some of the intended predecessors of NewN have already
509 // been created, we should hook them up as predecessors.
511 // Walk through the predecessors of 'N' and hook up their corresponding
512 // nodes in the new graph (if any) to the freshly created node.
513 for (const ExplodedNode
*Pred
: N
->Preds
) {
514 Pass2Ty::iterator PI
= Pass2
.find(Pred
);
515 if (PI
== Pass2
.end())
518 NewN
->addPredecessor(const_cast<ExplodedNode
*>(PI
->second
), *G
);
521 // In the case that some of the intended successors of NewN have already
522 // been created, we should hook them up as successors. Otherwise, enqueue
523 // the new nodes from the original graph that should have nodes created
525 for (const ExplodedNode
*Succ
: N
->Succs
) {
526 Pass2Ty::iterator PI
= Pass2
.find(Succ
);
527 if (PI
!= Pass2
.end()) {
528 const_cast<ExplodedNode
*>(PI
->second
)->addPredecessor(NewN
, *G
);
532 // Enqueue nodes to the worklist that were marked during pass 1.
533 if (Pass1
.count(Succ
))