1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an Environment class that is used by dataflow analyses
10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 // program at given program points.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/ErrorHandling.h"
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
35 static constexpr int MaxCompositeValueDepth
= 3;
36 static constexpr int MaxCompositeValueSize
= 1000;
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 static llvm::DenseMap
<const ValueDecl
*, StorageLocation
*> intersectDeclToLoc(
40 const llvm::DenseMap
<const ValueDecl
*, StorageLocation
*> &DeclToLoc1
,
41 const llvm::DenseMap
<const ValueDecl
*, StorageLocation
*> &DeclToLoc2
) {
42 llvm::DenseMap
<const ValueDecl
*, StorageLocation
*> Result
;
43 for (auto &Entry
: DeclToLoc1
) {
44 auto It
= DeclToLoc2
.find(Entry
.first
);
45 if (It
!= DeclToLoc2
.end() && Entry
.second
== It
->second
)
46 Result
.insert({Entry
.first
, Entry
.second
});
51 // Whether to consider equivalent two values with an unknown relation.
53 // FIXME: this function is a hack enabling unsoundness to support
54 // convergence. Once we have widening support for the reference/pointer and
55 // struct built-in models, this should be unconditionally `false` (and inlined
56 // as such at its call sites).
57 static bool equateUnknownValues(Value::Kind K
) {
59 case Value::Kind::Integer
:
60 case Value::Kind::Pointer
:
61 case Value::Kind::Record
:
68 static bool compareDistinctValues(QualType Type
, Value
&Val1
,
69 const Environment
&Env1
, Value
&Val2
,
70 const Environment
&Env2
,
71 Environment::ValueModel
&Model
) {
72 // Note: Potentially costly, but, for booleans, we could check whether both
73 // can be proven equivalent in their respective environments.
75 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
76 // and implement separate, join/widen specific handling for
77 // reference/pointers.
78 switch (Model
.compare(Type
, Val1
, Env1
, Val2
, Env2
)) {
79 case ComparisonResult::Same
:
81 case ComparisonResult::Different
:
83 case ComparisonResult::Unknown
:
84 return equateUnknownValues(Val1
.getKind());
86 llvm_unreachable("All cases covered in switch");
89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90 /// respectively, of the same type `Type`. Merging generally produces a single
91 /// value that (soundly) approximates the two inputs, although the actual
92 /// meaning depends on `Model`.
93 static Value
*mergeDistinctValues(QualType Type
, Value
&Val1
,
94 const Environment
&Env1
, Value
&Val2
,
95 const Environment
&Env2
,
96 Environment
&MergedEnv
,
97 Environment::ValueModel
&Model
) {
98 // Join distinct boolean values preserving information about the constraints
99 // in the respective path conditions.
100 if (isa
<BoolValue
>(&Val1
) && isa
<BoolValue
>(&Val2
)) {
101 // FIXME: Checking both values should be unnecessary, since they should have
102 // a consistent shape. However, right now we can end up with BoolValue's in
103 // integer-typed variables due to our incorrect handling of
104 // boolean-to-integer casts (we just propagate the BoolValue to the result
105 // of the cast). So, a join can encounter an integer in one branch but a
106 // bool in the other.
109 // std::optional<bool> o;
111 // if (o.has_value())
114 auto &Expr1
= cast
<BoolValue
>(Val1
).formula();
115 auto &Expr2
= cast
<BoolValue
>(Val2
).formula();
116 auto &A
= MergedEnv
.arena();
117 auto &MergedVal
= A
.makeAtomRef(A
.makeAtom());
119 A
.makeOr(A
.makeAnd(A
.makeAtomRef(Env1
.getFlowConditionToken()),
120 A
.makeEquals(MergedVal
, Expr1
)),
121 A
.makeAnd(A
.makeAtomRef(Env2
.getFlowConditionToken()),
122 A
.makeEquals(MergedVal
, Expr2
))));
123 return &A
.makeBoolValue(MergedVal
);
126 Value
*MergedVal
= nullptr;
127 if (auto *RecordVal1
= dyn_cast
<RecordValue
>(&Val1
)) {
128 auto *RecordVal2
= cast
<RecordValue
>(&Val2
);
130 if (&RecordVal1
->getLoc() == &RecordVal2
->getLoc())
131 // `RecordVal1` and `RecordVal2` may have different properties associated
132 // with them. Create a new `RecordValue` with the same location but
133 // without any properties so that we soundly approximate both values. If a
134 // particular analysis needs to merge properties, it should do so in
135 // `DataflowAnalysis::merge()`.
136 MergedVal
= &MergedEnv
.create
<RecordValue
>(RecordVal1
->getLoc());
138 // If the locations for the two records are different, need to create a
139 // completely new value.
140 MergedVal
= MergedEnv
.createValue(Type
);
142 MergedVal
= MergedEnv
.createValue(Type
);
145 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
146 // returns false to avoid storing unneeded values in `DACtx`.
148 if (Model
.merge(Type
, Val1
, Env1
, Val2
, Env2
, *MergedVal
, MergedEnv
))
154 // When widening does not change `Current`, return value will equal `&Prev`.
155 static Value
&widenDistinctValues(QualType Type
, Value
&Prev
,
156 const Environment
&PrevEnv
, Value
&Current
,
157 Environment
&CurrentEnv
,
158 Environment::ValueModel
&Model
) {
159 // Boolean-model widening.
160 if (auto *PrevBool
= dyn_cast
<BoolValue
>(&Prev
)) {
161 // If previous value was already Top, re-use that to (implicitly) indicate
162 // that no change occurred.
163 if (isa
<TopBoolValue
>(Prev
))
166 // We may need to widen to Top, but before we do so, check whether both
167 // values are implied to be either true or false in the current environment.
168 // In that case, we can simply return a literal instead.
169 auto &CurBool
= cast
<BoolValue
>(Current
);
170 bool TruePrev
= PrevEnv
.proves(PrevBool
->formula());
171 bool TrueCur
= CurrentEnv
.proves(CurBool
.formula());
172 if (TruePrev
&& TrueCur
)
173 return CurrentEnv
.getBoolLiteralValue(true);
174 if (!TruePrev
&& !TrueCur
&&
175 PrevEnv
.proves(PrevEnv
.arena().makeNot(PrevBool
->formula())) &&
176 CurrentEnv
.proves(CurrentEnv
.arena().makeNot(CurBool
.formula())))
177 return CurrentEnv
.getBoolLiteralValue(false);
179 return CurrentEnv
.makeTopBoolValue();
182 // FIXME: Add other built-in model widening.
184 // Custom-model widening.
185 if (auto *W
= Model
.widen(Type
, Prev
, PrevEnv
, Current
, CurrentEnv
))
188 return equateUnknownValues(Prev
.getKind()) ? Prev
: Current
;
191 // Returns whether the values in `Map1` and `Map2` compare equal for those
192 // keys that `Map1` and `Map2` have in common.
193 template <typename Key
>
194 bool compareKeyToValueMaps(const llvm::MapVector
<Key
, Value
*> &Map1
,
195 const llvm::MapVector
<Key
, Value
*> &Map2
,
196 const Environment
&Env1
, const Environment
&Env2
,
197 Environment::ValueModel
&Model
) {
198 for (auto &Entry
: Map1
) {
200 assert(K
!= nullptr);
202 Value
*Val
= Entry
.second
;
203 assert(Val
!= nullptr);
205 auto It
= Map2
.find(K
);
206 if (It
== Map2
.end())
208 assert(It
->second
!= nullptr);
210 if (!areEquivalentValues(*Val
, *It
->second
) &&
211 !compareDistinctValues(K
->getType(), *Val
, Env1
, *It
->second
, Env2
,
219 // Perform a join on two `LocToVal` maps.
220 static llvm::MapVector
<const StorageLocation
*, Value
*>
221 joinLocToVal(const llvm::MapVector
<const StorageLocation
*, Value
*> &LocToVal
,
222 const llvm::MapVector
<const StorageLocation
*, Value
*> &LocToVal2
,
223 const Environment
&Env1
, const Environment
&Env2
,
224 Environment
&JoinedEnv
, Environment::ValueModel
&Model
) {
225 llvm::MapVector
<const StorageLocation
*, Value
*> Result
;
226 for (auto &Entry
: LocToVal
) {
227 const StorageLocation
*Loc
= Entry
.first
;
228 assert(Loc
!= nullptr);
230 Value
*Val
= Entry
.second
;
231 assert(Val
!= nullptr);
233 auto It
= LocToVal2
.find(Loc
);
234 if (It
== LocToVal2
.end())
236 assert(It
->second
!= nullptr);
238 if (areEquivalentValues(*Val
, *It
->second
)) {
239 Result
.insert({Loc
, Val
});
243 if (Value
*MergedVal
= mergeDistinctValues(
244 Loc
->getType(), *Val
, Env1
, *It
->second
, Env2
, JoinedEnv
, Model
)) {
245 Result
.insert({Loc
, MergedVal
});
252 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
253 // `const StorageLocation *` or `const Expr *`.
254 template <typename Key
>
255 llvm::MapVector
<Key
, Value
*>
256 widenKeyToValueMap(const llvm::MapVector
<Key
, Value
*> &CurMap
,
257 const llvm::MapVector
<Key
, Value
*> &PrevMap
,
258 Environment
&CurEnv
, const Environment
&PrevEnv
,
259 Environment::ValueModel
&Model
, LatticeJoinEffect
&Effect
) {
260 llvm::MapVector
<Key
, Value
*> WidenedMap
;
261 for (auto &Entry
: CurMap
) {
263 assert(K
!= nullptr);
265 Value
*Val
= Entry
.second
;
266 assert(Val
!= nullptr);
268 auto PrevIt
= PrevMap
.find(K
);
269 if (PrevIt
== PrevMap
.end())
271 assert(PrevIt
->second
!= nullptr);
273 if (areEquivalentValues(*Val
, *PrevIt
->second
)) {
274 WidenedMap
.insert({K
, Val
});
278 Value
&WidenedVal
= widenDistinctValues(K
->getType(), *PrevIt
->second
,
279 PrevEnv
, *Val
, CurEnv
, Model
);
280 WidenedMap
.insert({K
, &WidenedVal
});
281 if (&WidenedVal
!= PrevIt
->second
)
282 Effect
= LatticeJoinEffect::Changed
;
288 /// Initializes a global storage value.
289 static void insertIfGlobal(const Decl
&D
,
290 llvm::DenseSet
<const VarDecl
*> &Vars
) {
291 if (auto *V
= dyn_cast
<VarDecl
>(&D
))
292 if (V
->hasGlobalStorage())
296 static void insertIfFunction(const Decl
&D
,
297 llvm::DenseSet
<const FunctionDecl
*> &Funcs
) {
298 if (auto *FD
= dyn_cast
<FunctionDecl
>(&D
))
302 static MemberExpr
*getMemberForAccessor(const CXXMemberCallExpr
&C
) {
303 // Use getCalleeDecl instead of getMethodDecl in order to handle
304 // pointer-to-member calls.
305 const auto *MethodDecl
= dyn_cast_or_null
<CXXMethodDecl
>(C
.getCalleeDecl());
308 auto *Body
= dyn_cast_or_null
<CompoundStmt
>(MethodDecl
->getBody());
309 if (!Body
|| Body
->size() != 1)
311 if (auto *RS
= dyn_cast
<ReturnStmt
>(*Body
->body_begin()))
312 if (auto *Return
= RS
->getRetValue())
313 return dyn_cast
<MemberExpr
>(Return
->IgnoreParenImpCasts());
318 getFieldsGlobalsAndFuncs(const Decl
&D
, FieldSet
&Fields
,
319 llvm::DenseSet
<const VarDecl
*> &Vars
,
320 llvm::DenseSet
<const FunctionDecl
*> &Funcs
) {
321 insertIfGlobal(D
, Vars
);
322 insertIfFunction(D
, Funcs
);
323 if (const auto *Decomp
= dyn_cast
<DecompositionDecl
>(&D
))
324 for (const auto *B
: Decomp
->bindings())
325 if (auto *ME
= dyn_cast_or_null
<MemberExpr
>(B
->getBinding()))
326 // FIXME: should we be using `E->getFoundDecl()`?
327 if (const auto *FD
= dyn_cast
<FieldDecl
>(ME
->getMemberDecl()))
331 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
332 /// global variables and functions that are declared in or referenced from
335 getFieldsGlobalsAndFuncs(const Stmt
&S
, FieldSet
&Fields
,
336 llvm::DenseSet
<const VarDecl
*> &Vars
,
337 llvm::DenseSet
<const FunctionDecl
*> &Funcs
) {
338 for (auto *Child
: S
.children())
339 if (Child
!= nullptr)
340 getFieldsGlobalsAndFuncs(*Child
, Fields
, Vars
, Funcs
);
341 if (const auto *DefaultInit
= dyn_cast
<CXXDefaultInitExpr
>(&S
))
342 getFieldsGlobalsAndFuncs(*DefaultInit
->getExpr(), Fields
, Vars
, Funcs
);
344 if (auto *DS
= dyn_cast
<DeclStmt
>(&S
)) {
345 if (DS
->isSingleDecl())
346 getFieldsGlobalsAndFuncs(*DS
->getSingleDecl(), Fields
, Vars
, Funcs
);
348 for (auto *D
: DS
->getDeclGroup())
349 getFieldsGlobalsAndFuncs(*D
, Fields
, Vars
, Funcs
);
350 } else if (auto *E
= dyn_cast
<DeclRefExpr
>(&S
)) {
351 insertIfGlobal(*E
->getDecl(), Vars
);
352 insertIfFunction(*E
->getDecl(), Funcs
);
353 } else if (const auto *C
= dyn_cast
<CXXMemberCallExpr
>(&S
)) {
354 // If this is a method that returns a member variable but does nothing else,
355 // model the field of the return value.
356 if (MemberExpr
*E
= getMemberForAccessor(*C
))
357 if (const auto *FD
= dyn_cast
<FieldDecl
>(E
->getMemberDecl()))
359 } else if (auto *E
= dyn_cast
<MemberExpr
>(&S
)) {
360 // FIXME: should we be using `E->getFoundDecl()`?
361 const ValueDecl
*VD
= E
->getMemberDecl();
362 insertIfGlobal(*VD
, Vars
);
363 insertIfFunction(*VD
, Funcs
);
364 if (const auto *FD
= dyn_cast
<FieldDecl
>(VD
))
366 } else if (auto *InitList
= dyn_cast
<InitListExpr
>(&S
)) {
367 if (RecordDecl
*RD
= InitList
->getType()->getAsRecordDecl())
368 for (const auto *FD
: getFieldsForInitListExpr(RD
))
373 Environment::Environment(DataflowAnalysisContext
&DACtx
)
375 FlowConditionToken(DACtx
.arena().makeFlowConditionToken()) {}
377 Environment::Environment(DataflowAnalysisContext
&DACtx
,
378 const DeclContext
&DeclCtx
)
379 : Environment(DACtx
) {
380 CallStack
.push_back(&DeclCtx
);
383 void Environment::initialize() {
384 const DeclContext
*DeclCtx
= getDeclCtx();
385 if (DeclCtx
== nullptr)
388 if (const auto *FuncDecl
= dyn_cast
<FunctionDecl
>(DeclCtx
)) {
389 assert(FuncDecl
->getBody() != nullptr);
391 initFieldsGlobalsAndFuncs(FuncDecl
);
393 for (const auto *ParamDecl
: FuncDecl
->parameters()) {
394 assert(ParamDecl
!= nullptr);
395 setStorageLocation(*ParamDecl
, createObject(*ParamDecl
, nullptr));
399 if (const auto *MethodDecl
= dyn_cast
<CXXMethodDecl
>(DeclCtx
)) {
400 auto *Parent
= MethodDecl
->getParent();
401 assert(Parent
!= nullptr);
403 if (Parent
->isLambda()) {
404 for (auto Capture
: Parent
->captures()) {
405 if (Capture
.capturesVariable()) {
406 const auto *VarDecl
= Capture
.getCapturedVar();
407 assert(VarDecl
!= nullptr);
408 setStorageLocation(*VarDecl
, createObject(*VarDecl
, nullptr));
409 } else if (Capture
.capturesThis()) {
410 const auto *SurroundingMethodDecl
=
411 cast
<CXXMethodDecl
>(DeclCtx
->getNonClosureAncestor());
412 QualType ThisPointeeType
=
413 SurroundingMethodDecl
->getFunctionObjectParameterType();
414 setThisPointeeStorageLocation(
415 cast
<RecordValue
>(createValue(ThisPointeeType
))->getLoc());
418 } else if (MethodDecl
->isImplicitObjectMemberFunction()) {
419 QualType ThisPointeeType
= MethodDecl
->getFunctionObjectParameterType();
420 setThisPointeeStorageLocation(
421 cast
<RecordValue
>(createValue(ThisPointeeType
))->getLoc());
426 // FIXME: Add support for resetting globals after function calls to enable
427 // the implementation of sound analyses.
428 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl
*FuncDecl
) {
429 assert(FuncDecl
->getBody() != nullptr);
432 llvm::DenseSet
<const VarDecl
*> Vars
;
433 llvm::DenseSet
<const FunctionDecl
*> Funcs
;
435 // Look for global variable and field references in the
436 // constructor-initializers.
437 if (const auto *CtorDecl
= dyn_cast
<CXXConstructorDecl
>(FuncDecl
)) {
438 for (const auto *Init
: CtorDecl
->inits()) {
439 if (Init
->isMemberInitializer()) {
440 Fields
.insert(Init
->getMember());
441 } else if (Init
->isIndirectMemberInitializer()) {
442 for (const auto *I
: Init
->getIndirectMember()->chain())
443 Fields
.insert(cast
<FieldDecl
>(I
));
445 const Expr
*E
= Init
->getInit();
446 assert(E
!= nullptr);
447 getFieldsGlobalsAndFuncs(*E
, Fields
, Vars
, Funcs
);
449 // Add all fields mentioned in default member initializers.
450 for (const FieldDecl
*F
: CtorDecl
->getParent()->fields())
451 if (const auto *I
= F
->getInClassInitializer())
452 getFieldsGlobalsAndFuncs(*I
, Fields
, Vars
, Funcs
);
454 getFieldsGlobalsAndFuncs(*FuncDecl
->getBody(), Fields
, Vars
, Funcs
);
456 // These have to be added before the lines that follow to ensure that
457 // `create*` work correctly for structs.
458 DACtx
->addModeledFields(Fields
);
460 for (const VarDecl
*D
: Vars
) {
461 if (getStorageLocation(*D
) != nullptr)
464 setStorageLocation(*D
, createObject(*D
));
467 for (const FunctionDecl
*FD
: Funcs
) {
468 if (getStorageLocation(*FD
) != nullptr)
470 auto &Loc
= createStorageLocation(FD
->getType());
471 setStorageLocation(*FD
, Loc
);
475 Environment
Environment::fork() const {
476 Environment
Copy(*this);
477 Copy
.FlowConditionToken
= DACtx
->forkFlowCondition(FlowConditionToken
);
481 bool Environment::canDescend(unsigned MaxDepth
,
482 const DeclContext
*Callee
) const {
483 return CallStack
.size() <= MaxDepth
&& !llvm::is_contained(CallStack
, Callee
);
486 Environment
Environment::pushCall(const CallExpr
*Call
) const {
487 Environment
Env(*this);
489 if (const auto *MethodCall
= dyn_cast
<CXXMemberCallExpr
>(Call
)) {
490 if (const Expr
*Arg
= MethodCall
->getImplicitObjectArgument()) {
491 if (!isa
<CXXThisExpr
>(Arg
))
493 cast
<RecordStorageLocation
>(getStorageLocation(*Arg
));
494 // Otherwise (when the argument is `this`), retain the current
495 // environment's `ThisPointeeLoc`.
499 Env
.pushCallInternal(Call
->getDirectCallee(),
500 llvm::ArrayRef(Call
->getArgs(), Call
->getNumArgs()));
505 Environment
Environment::pushCall(const CXXConstructExpr
*Call
) const {
506 Environment
Env(*this);
508 Env
.ThisPointeeLoc
= &Env
.getResultObjectLocation(*Call
);
510 Env
.pushCallInternal(Call
->getConstructor(),
511 llvm::ArrayRef(Call
->getArgs(), Call
->getNumArgs()));
516 void Environment::pushCallInternal(const FunctionDecl
*FuncDecl
,
517 ArrayRef
<const Expr
*> Args
) {
518 // Canonicalize to the definition of the function. This ensures that we're
519 // putting arguments into the same `ParamVarDecl`s` that the callee will later
520 // be retrieving them from.
521 assert(FuncDecl
->getDefinition() != nullptr);
522 FuncDecl
= FuncDecl
->getDefinition();
524 CallStack
.push_back(FuncDecl
);
526 initFieldsGlobalsAndFuncs(FuncDecl
);
528 const auto *ParamIt
= FuncDecl
->param_begin();
530 // FIXME: Parameters don't always map to arguments 1:1; examples include
531 // overloaded operators implemented as member functions, and parameter packs.
532 for (unsigned ArgIndex
= 0; ArgIndex
< Args
.size(); ++ParamIt
, ++ArgIndex
) {
533 assert(ParamIt
!= FuncDecl
->param_end());
534 const VarDecl
*Param
= *ParamIt
;
535 setStorageLocation(*Param
, createObject(*Param
, Args
[ArgIndex
]));
539 void Environment::popCall(const CallExpr
*Call
, const Environment
&CalleeEnv
) {
540 // We ignore some entries of `CalleeEnv`:
541 // - `DACtx` because is already the same in both
542 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or
543 // `ThisPointeeLoc` because they don't apply to us.
544 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the
545 // callee's local scope, so when popping that scope, we do not propagate
547 this->LocToVal
= std::move(CalleeEnv
.LocToVal
);
548 this->FlowConditionToken
= std::move(CalleeEnv
.FlowConditionToken
);
550 if (Call
->isGLValue()) {
551 if (CalleeEnv
.ReturnLoc
!= nullptr)
552 setStorageLocation(*Call
, *CalleeEnv
.ReturnLoc
);
553 } else if (!Call
->getType()->isVoidType()) {
554 if (CalleeEnv
.ReturnVal
!= nullptr)
555 setValue(*Call
, *CalleeEnv
.ReturnVal
);
559 void Environment::popCall(const CXXConstructExpr
*Call
,
560 const Environment
&CalleeEnv
) {
561 // See also comment in `popCall(const CallExpr *, const Environment &)` above.
562 this->LocToVal
= std::move(CalleeEnv
.LocToVal
);
563 this->FlowConditionToken
= std::move(CalleeEnv
.FlowConditionToken
);
565 if (Value
*Val
= CalleeEnv
.getValue(*CalleeEnv
.ThisPointeeLoc
)) {
566 setValue(*Call
, *Val
);
570 bool Environment::equivalentTo(const Environment
&Other
,
571 Environment::ValueModel
&Model
) const {
572 assert(DACtx
== Other
.DACtx
);
574 if (ReturnVal
!= Other
.ReturnVal
)
577 if (ReturnLoc
!= Other
.ReturnLoc
)
580 if (ThisPointeeLoc
!= Other
.ThisPointeeLoc
)
583 if (DeclToLoc
!= Other
.DeclToLoc
)
586 if (ExprToLoc
!= Other
.ExprToLoc
)
589 if (!compareKeyToValueMaps(ExprToVal
, Other
.ExprToVal
, *this, Other
, Model
))
592 if (!compareKeyToValueMaps(LocToVal
, Other
.LocToVal
, *this, Other
, Model
))
598 LatticeJoinEffect
Environment::widen(const Environment
&PrevEnv
,
599 Environment::ValueModel
&Model
) {
600 assert(DACtx
== PrevEnv
.DACtx
);
601 assert(ReturnVal
== PrevEnv
.ReturnVal
);
602 assert(ReturnLoc
== PrevEnv
.ReturnLoc
);
603 assert(ThisPointeeLoc
== PrevEnv
.ThisPointeeLoc
);
604 assert(CallStack
== PrevEnv
.CallStack
);
606 auto Effect
= LatticeJoinEffect::Unchanged
;
608 // By the API, `PrevEnv` is a previous version of the environment for the same
609 // block, so we have some guarantees about its shape. In particular, it will
610 // be the result of a join or widen operation on previous values for this
611 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that
612 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain
613 // this property here, we don't need change their current values to widen.
614 assert(DeclToLoc
.size() <= PrevEnv
.DeclToLoc
.size());
615 assert(ExprToVal
.size() <= PrevEnv
.ExprToVal
.size());
616 assert(ExprToLoc
.size() <= PrevEnv
.ExprToLoc
.size());
618 ExprToVal
= widenKeyToValueMap(ExprToVal
, PrevEnv
.ExprToVal
, *this, PrevEnv
,
621 LocToVal
= widenKeyToValueMap(LocToVal
, PrevEnv
.LocToVal
, *this, PrevEnv
,
623 if (DeclToLoc
.size() != PrevEnv
.DeclToLoc
.size() ||
624 ExprToLoc
.size() != PrevEnv
.ExprToLoc
.size() ||
625 ExprToVal
.size() != PrevEnv
.ExprToVal
.size() ||
626 LocToVal
.size() != PrevEnv
.LocToVal
.size())
627 Effect
= LatticeJoinEffect::Changed
;
632 Environment
Environment::join(const Environment
&EnvA
, const Environment
&EnvB
,
633 Environment::ValueModel
&Model
) {
634 assert(EnvA
.DACtx
== EnvB
.DACtx
);
635 assert(EnvA
.ThisPointeeLoc
== EnvB
.ThisPointeeLoc
);
636 assert(EnvA
.CallStack
== EnvB
.CallStack
);
638 Environment
JoinedEnv(*EnvA
.DACtx
);
640 JoinedEnv
.CallStack
= EnvA
.CallStack
;
641 JoinedEnv
.ThisPointeeLoc
= EnvA
.ThisPointeeLoc
;
643 if (EnvA
.ReturnVal
== nullptr || EnvB
.ReturnVal
== nullptr) {
644 // `ReturnVal` might not always get set -- for example if we have a return
645 // statement of the form `return some_other_func()` and we decide not to
646 // analyze `some_other_func()`.
647 // In this case, we can't say anything about the joined return value -- we
648 // don't simply want to propagate the return value that we do have, because
649 // it might not be the correct one.
650 // This occurs for example in the test `ContextSensitiveMutualRecursion`.
651 JoinedEnv
.ReturnVal
= nullptr;
652 } else if (areEquivalentValues(*EnvA
.ReturnVal
, *EnvB
.ReturnVal
)) {
653 JoinedEnv
.ReturnVal
= EnvA
.ReturnVal
;
655 assert(!EnvA
.CallStack
.empty());
656 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
658 auto *Func
= dyn_cast
<FunctionDecl
>(EnvA
.CallStack
.back());
659 assert(Func
!= nullptr);
660 if (Value
*MergedVal
=
661 mergeDistinctValues(Func
->getReturnType(), *EnvA
.ReturnVal
, EnvA
,
662 *EnvB
.ReturnVal
, EnvB
, JoinedEnv
, Model
))
663 JoinedEnv
.ReturnVal
= MergedVal
;
666 if (EnvA
.ReturnLoc
== EnvB
.ReturnLoc
)
667 JoinedEnv
.ReturnLoc
= EnvA
.ReturnLoc
;
669 JoinedEnv
.ReturnLoc
= nullptr;
671 JoinedEnv
.DeclToLoc
= intersectDeclToLoc(EnvA
.DeclToLoc
, EnvB
.DeclToLoc
);
673 // FIXME: update join to detect backedges and simplify the flow condition
675 JoinedEnv
.FlowConditionToken
= EnvA
.DACtx
->joinFlowConditions(
676 EnvA
.FlowConditionToken
, EnvB
.FlowConditionToken
);
679 joinLocToVal(EnvA
.LocToVal
, EnvB
.LocToVal
, EnvA
, EnvB
, JoinedEnv
, Model
);
681 // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal`
682 // empty, as we never need to access entries in these maps outside of the
683 // basic block that sets them.
688 StorageLocation
&Environment::createStorageLocation(QualType Type
) {
689 return DACtx
->createStorageLocation(Type
);
692 StorageLocation
&Environment::createStorageLocation(const ValueDecl
&D
) {
693 // Evaluated declarations are always assigned the same storage locations to
694 // ensure that the environment stabilizes across loop iterations. Storage
695 // locations for evaluated declarations are stored in the analysis context.
696 return DACtx
->getStableStorageLocation(D
);
699 StorageLocation
&Environment::createStorageLocation(const Expr
&E
) {
700 // Evaluated expressions are always assigned the same storage locations to
701 // ensure that the environment stabilizes across loop iterations. Storage
702 // locations for evaluated expressions are stored in the analysis context.
703 return DACtx
->getStableStorageLocation(E
);
706 void Environment::setStorageLocation(const ValueDecl
&D
, StorageLocation
&Loc
) {
707 assert(!DeclToLoc
.contains(&D
));
708 DeclToLoc
[&D
] = &Loc
;
711 StorageLocation
*Environment::getStorageLocation(const ValueDecl
&D
) const {
712 auto It
= DeclToLoc
.find(&D
);
713 if (It
== DeclToLoc
.end())
716 StorageLocation
*Loc
= It
->second
;
721 void Environment::removeDecl(const ValueDecl
&D
) { DeclToLoc
.erase(&D
); }
723 void Environment::setStorageLocation(const Expr
&E
, StorageLocation
&Loc
) {
724 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
725 // but we still want to be able to associate a `StorageLocation` with them,
726 // so allow these as an exception.
727 assert(E
.isGLValue() ||
728 E
.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn
));
729 const Expr
&CanonE
= ignoreCFGOmittedNodes(E
);
730 assert(!ExprToLoc
.contains(&CanonE
));
731 ExprToLoc
[&CanonE
] = &Loc
;
734 StorageLocation
*Environment::getStorageLocation(const Expr
&E
) const {
735 // See comment in `setStorageLocation()`.
736 assert(E
.isGLValue() ||
737 E
.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn
));
738 auto It
= ExprToLoc
.find(&ignoreCFGOmittedNodes(E
));
739 return It
== ExprToLoc
.end() ? nullptr : &*It
->second
;
742 // Returns whether a prvalue of record type is the one that originally
743 // constructs the object (i.e. it doesn't propagate it from one of its
745 static bool isOriginalRecordConstructor(const Expr
&RecordPRValue
) {
746 if (auto *Init
= dyn_cast
<InitListExpr
>(&RecordPRValue
))
747 return !Init
->isSemanticForm() || !Init
->isTransparent();
748 return isa
<CXXConstructExpr
>(RecordPRValue
) || isa
<CallExpr
>(RecordPRValue
) ||
749 isa
<LambdaExpr
>(RecordPRValue
) ||
750 // The framework currently does not propagate the objects created in
751 // the two branches of a `ConditionalOperator` because there is no way
752 // to reconcile their storage locations, which are different. We
753 // therefore claim that the `ConditionalOperator` is the expression
754 // that originally constructs the object.
755 // Ultimately, this will be fixed by propagating locations down from
756 // the result object, rather than up from the original constructor as
757 // we do now (see also the FIXME in the documentation for
758 // `getResultObjectLocation()`).
759 isa
<ConditionalOperator
>(RecordPRValue
);
762 RecordStorageLocation
&
763 Environment::getResultObjectLocation(const Expr
&RecordPRValue
) const {
764 assert(RecordPRValue
.getType()->isRecordType());
765 assert(RecordPRValue
.isPRValue());
767 // Returns a storage location that we can use if assertions fail.
768 auto FallbackForAssertFailure
=
769 [this, &RecordPRValue
]() -> RecordStorageLocation
& {
770 return cast
<RecordStorageLocation
>(
771 DACtx
->getStableStorageLocation(RecordPRValue
));
774 if (isOriginalRecordConstructor(RecordPRValue
)) {
775 auto *Val
= cast_or_null
<RecordValue
>(getValue(RecordPRValue
));
776 // The builtin transfer function should have created a `RecordValue` for all
777 // original record constructors.
780 return FallbackForAssertFailure();
781 return Val
->getLoc();
784 // Expression nodes that propagate a record prvalue should have exactly one
786 llvm::SmallVector
<const Stmt
*> children(RecordPRValue
.child_begin(),
787 RecordPRValue
.child_end());
788 assert(children
.size() == 1);
789 if (children
.empty())
790 return FallbackForAssertFailure();
792 return getResultObjectLocation(*cast
<Expr
>(children
[0]));
795 PointerValue
&Environment::getOrCreateNullPointerValue(QualType PointeeType
) {
796 return DACtx
->getOrCreateNullPointerValue(PointeeType
);
799 void Environment::setValue(const StorageLocation
&Loc
, Value
&Val
) {
800 assert(!isa
<RecordValue
>(&Val
) || &cast
<RecordValue
>(&Val
)->getLoc() == &Loc
);
802 LocToVal
[&Loc
] = &Val
;
805 void Environment::setValue(const Expr
&E
, Value
&Val
) {
806 if (auto *RecordVal
= dyn_cast
<RecordValue
>(&Val
)) {
807 assert(isOriginalRecordConstructor(E
) ||
808 &RecordVal
->getLoc() == &getResultObjectLocation(E
));
811 assert(E
.isPRValue());
812 ExprToVal
[&E
] = &Val
;
815 Value
*Environment::getValue(const StorageLocation
&Loc
) const {
816 return LocToVal
.lookup(&Loc
);
819 Value
*Environment::getValue(const ValueDecl
&D
) const {
820 auto *Loc
= getStorageLocation(D
);
823 return getValue(*Loc
);
826 Value
*Environment::getValue(const Expr
&E
) const {
828 auto It
= ExprToVal
.find(&ignoreCFGOmittedNodes(E
));
829 return It
== ExprToVal
.end() ? nullptr : It
->second
;
832 auto It
= ExprToLoc
.find(&ignoreCFGOmittedNodes(E
));
833 if (It
== ExprToLoc
.end())
835 return getValue(*It
->second
);
838 Value
*Environment::createValue(QualType Type
) {
839 llvm::DenseSet
<QualType
> Visited
;
840 int CreatedValuesCount
= 0;
841 Value
*Val
= createValueUnlessSelfReferential(Type
, Visited
, /*Depth=*/0,
843 if (CreatedValuesCount
> MaxCompositeValueSize
) {
844 llvm::errs() << "Attempting to initialize a huge value of type: " << Type
850 Value
*Environment::createValueUnlessSelfReferential(
851 QualType Type
, llvm::DenseSet
<QualType
> &Visited
, int Depth
,
852 int &CreatedValuesCount
) {
853 assert(!Type
.isNull());
854 assert(!Type
->isReferenceType());
856 // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
857 if ((Depth
> 1 && CreatedValuesCount
> MaxCompositeValueSize
) ||
858 Depth
> MaxCompositeValueDepth
)
861 if (Type
->isBooleanType()) {
862 CreatedValuesCount
++;
863 return &makeAtomicBoolValue();
866 if (Type
->isIntegerType()) {
867 // FIXME: consider instead `return nullptr`, given that we do nothing useful
868 // with integers, and so distinguishing them serves no purpose, but could
869 // prevent convergence.
870 CreatedValuesCount
++;
871 return &arena().create
<IntegerValue
>();
874 if (Type
->isPointerType()) {
875 CreatedValuesCount
++;
876 QualType PointeeType
= Type
->getPointeeType();
877 StorageLocation
&PointeeLoc
=
878 createLocAndMaybeValue(PointeeType
, Visited
, Depth
, CreatedValuesCount
);
880 return &arena().create
<PointerValue
>(PointeeLoc
);
883 if (Type
->isRecordType()) {
884 CreatedValuesCount
++;
885 llvm::DenseMap
<const ValueDecl
*, StorageLocation
*> FieldLocs
;
886 for (const FieldDecl
*Field
: DACtx
->getModeledFields(Type
)) {
887 assert(Field
!= nullptr);
889 QualType FieldType
= Field
->getType();
892 {Field
, &createLocAndMaybeValue(FieldType
, Visited
, Depth
+ 1,
893 CreatedValuesCount
)});
896 RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs
;
897 for (const auto &Entry
: DACtx
->getSyntheticFields(Type
)) {
898 SyntheticFieldLocs
.insert(
900 &createLocAndMaybeValue(Entry
.getValue(), Visited
, Depth
+ 1,
901 CreatedValuesCount
)});
904 RecordStorageLocation
&Loc
= DACtx
->createRecordStorageLocation(
905 Type
, std::move(FieldLocs
), std::move(SyntheticFieldLocs
));
906 RecordValue
&RecordVal
= create
<RecordValue
>(Loc
);
908 // As we already have a storage location for the `RecordValue`, we can and
909 // should associate them in the environment.
910 setValue(Loc
, RecordVal
);
919 Environment::createLocAndMaybeValue(QualType Ty
,
920 llvm::DenseSet
<QualType
> &Visited
,
921 int Depth
, int &CreatedValuesCount
) {
922 if (!Visited
.insert(Ty
.getCanonicalType()).second
)
923 return createStorageLocation(Ty
.getNonReferenceType());
924 Value
*Val
= createValueUnlessSelfReferential(
925 Ty
.getNonReferenceType(), Visited
, Depth
, CreatedValuesCount
);
926 Visited
.erase(Ty
.getCanonicalType());
928 Ty
= Ty
.getNonReferenceType();
931 return createStorageLocation(Ty
);
933 if (Ty
->isRecordType())
934 return cast
<RecordValue
>(Val
)->getLoc();
936 StorageLocation
&Loc
= createStorageLocation(Ty
);
941 StorageLocation
&Environment::createObjectInternal(const ValueDecl
*D
,
943 const Expr
*InitExpr
) {
944 if (Ty
->isReferenceType()) {
945 // Although variables of reference type always need to be initialized, it
946 // can happen that we can't see the initializer, so `InitExpr` may still
949 if (auto *InitExprLoc
= getStorageLocation(*InitExpr
))
953 // Even though we have an initializer, we might not get an
954 // InitExprLoc, for example if the InitExpr is a CallExpr for which we
955 // don't have a function body. In this case, we just invent a storage
956 // location and value -- it's the best we can do.
957 return createObjectInternal(D
, Ty
.getNonReferenceType(), nullptr);
960 Value
*Val
= nullptr;
962 // In the (few) cases where an expression is intentionally
963 // "uninterpreted", `InitExpr` is not associated with a value. There are
964 // two ways to handle this situation: propagate the status, so that
965 // uninterpreted initializers result in uninterpreted variables, or
966 // provide a default value. We choose the latter so that later refinements
967 // of the variable can be used for reasoning about the surrounding code.
968 // For this reason, we let this case be handled by the `createValue()`
971 // FIXME. If and when we interpret all language cases, change this to
972 // assert that `InitExpr` is interpreted, rather than supplying a
973 // default value (assuming we don't update the environment API to return
975 Val
= getValue(*InitExpr
);
977 Val
= createValue(Ty
);
979 if (Ty
->isRecordType())
980 return cast
<RecordValue
>(Val
)->getLoc();
982 StorageLocation
&Loc
=
983 D
? createStorageLocation(*D
) : createStorageLocation(Ty
);
991 void Environment::assume(const Formula
&F
) {
992 DACtx
->addFlowConditionConstraint(FlowConditionToken
, F
);
995 bool Environment::proves(const Formula
&F
) const {
996 return DACtx
->flowConditionImplies(FlowConditionToken
, F
);
999 bool Environment::allows(const Formula
&F
) const {
1000 return DACtx
->flowConditionAllows(FlowConditionToken
, F
);
1003 void Environment::dump(raw_ostream
&OS
) const {
1004 // FIXME: add printing for remaining fields and allow caller to decide what
1005 // fields are printed.
1006 OS
<< "DeclToLoc:\n";
1007 for (auto [D
, L
] : DeclToLoc
)
1008 OS
<< " [" << D
->getNameAsString() << ", " << L
<< "]\n";
1010 OS
<< "ExprToLoc:\n";
1011 for (auto [E
, L
] : ExprToLoc
)
1012 OS
<< " [" << E
<< ", " << L
<< "]\n";
1014 OS
<< "ExprToVal:\n";
1015 for (auto [E
, V
] : ExprToVal
)
1016 OS
<< " [" << E
<< ", " << V
<< ": " << *V
<< "]\n";
1018 OS
<< "LocToVal:\n";
1019 for (auto [L
, V
] : LocToVal
) {
1020 OS
<< " [" << L
<< ", " << V
<< ": " << *V
<< "]\n";
1024 DACtx
->dumpFlowCondition(FlowConditionToken
, OS
);
1027 void Environment::dump() const {
1031 RecordStorageLocation
*getImplicitObjectLocation(const CXXMemberCallExpr
&MCE
,
1032 const Environment
&Env
) {
1033 Expr
*ImplicitObject
= MCE
.getImplicitObjectArgument();
1034 if (ImplicitObject
== nullptr)
1036 if (ImplicitObject
->getType()->isPointerType()) {
1037 if (auto *Val
= Env
.get
<PointerValue
>(*ImplicitObject
))
1038 return &cast
<RecordStorageLocation
>(Val
->getPointeeLoc());
1041 return cast_or_null
<RecordStorageLocation
>(
1042 Env
.getStorageLocation(*ImplicitObject
));
1045 RecordStorageLocation
*getBaseObjectLocation(const MemberExpr
&ME
,
1046 const Environment
&Env
) {
1047 Expr
*Base
= ME
.getBase();
1048 if (Base
== nullptr)
1051 if (auto *Val
= Env
.get
<PointerValue
>(*Base
))
1052 return &cast
<RecordStorageLocation
>(Val
->getPointeeLoc());
1055 return Env
.get
<RecordStorageLocation
>(*Base
);
1058 std::vector
<FieldDecl
*> getFieldsForInitListExpr(const RecordDecl
*RD
) {
1059 // Unnamed bitfields are only used for padding and do not appear in
1060 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
1061 // field list, and we thus need to remove them before mapping inits to
1062 // fields to avoid mapping inits to the wrongs fields.
1063 std::vector
<FieldDecl
*> Fields
;
1065 RD
->fields(), std::back_inserter(Fields
),
1066 [](const FieldDecl
*Field
) { return !Field
->isUnnamedBitfield(); });
1070 RecordValue
&refreshRecordValue(RecordStorageLocation
&Loc
, Environment
&Env
) {
1071 auto &NewVal
= Env
.create
<RecordValue
>(Loc
);
1072 Env
.setValue(Loc
, NewVal
);
1076 RecordValue
&refreshRecordValue(const Expr
&Expr
, Environment
&Env
) {
1077 assert(Expr
.getType()->isRecordType());
1079 if (Expr
.isPRValue()) {
1080 if (auto *ExistingVal
= Env
.get
<RecordValue
>(Expr
)) {
1081 auto &NewVal
= Env
.create
<RecordValue
>(ExistingVal
->getLoc());
1082 Env
.setValue(Expr
, NewVal
);
1083 Env
.setValue(NewVal
.getLoc(), NewVal
);
1087 auto &NewVal
= *cast
<RecordValue
>(Env
.createValue(Expr
.getType()));
1088 Env
.setValue(Expr
, NewVal
);
1092 if (auto *Loc
= Env
.get
<RecordStorageLocation
>(Expr
)) {
1093 auto &NewVal
= Env
.create
<RecordValue
>(*Loc
);
1094 Env
.setValue(*Loc
, NewVal
);
1098 auto &NewVal
= *cast
<RecordValue
>(Env
.createValue(Expr
.getType()));
1099 Env
.setStorageLocation(Expr
, NewVal
.getLoc());
1103 } // namespace dataflow
1104 } // namespace clang