1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a DataflowAnalysisContext class that owns objects that
10 // encompass the state of a program and stores context that is used during
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
18 #include "clang/Analysis/FlowSensitive/Formula.h"
19 #include "clang/Analysis/FlowSensitive/Logger.h"
20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/SetOperations.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
35 static llvm::cl::opt
<std::string
> DataflowLog(
36 "dataflow-log", llvm::cl::Hidden
, llvm::cl::ValueOptional
,
37 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
38 "log to stderr. With an arg, writes HTML logs under the "
39 "specified directory (one per analyzed function)."));
44 FieldSet
DataflowAnalysisContext::getModeledFields(QualType Type
) {
45 // During context-sensitive analysis, a struct may be allocated in one
46 // function, but its field accessed in a function lower in the stack than
47 // the allocation. Since we only collect fields used in the function where
48 // the allocation occurs, we can't apply that filter when performing
49 // context-sensitive analysis. But, this only applies to storage locations,
50 // since field access it not allowed to fail. In contrast, field *values*
51 // don't need this allowance, since the API allows for uninitialized fields.
52 if (Opts
.ContextSensitiveOpts
)
53 return getObjectFields(Type
);
55 return llvm::set_intersection(getObjectFields(Type
), ModeledFields
);
58 void DataflowAnalysisContext::addModeledFields(const FieldSet
&Fields
) {
59 ModeledFields
.set_union(Fields
);
62 StorageLocation
&DataflowAnalysisContext::createStorageLocation(QualType Type
) {
63 if (!Type
.isNull() && Type
->isRecordType()) {
64 llvm::DenseMap
<const ValueDecl
*, StorageLocation
*> FieldLocs
;
65 for (const FieldDecl
*Field
: getModeledFields(Type
))
66 if (Field
->getType()->isReferenceType())
67 FieldLocs
.insert({Field
, nullptr});
69 FieldLocs
.insert({Field
, &createStorageLocation(
70 Field
->getType().getNonReferenceType())});
72 RecordStorageLocation::SyntheticFieldMap SyntheticFields
;
73 for (const auto &Entry
: getSyntheticFields(Type
))
74 SyntheticFields
.insert(
76 &createStorageLocation(Entry
.getValue().getNonReferenceType())});
78 return createRecordStorageLocation(Type
, std::move(FieldLocs
),
79 std::move(SyntheticFields
));
81 return arena().create
<ScalarStorageLocation
>(Type
);
84 // Returns the keys for a given `StringMap`.
85 // Can't use `StringSet` as the return type as it doesn't support `operator==`.
87 static llvm::DenseSet
<llvm::StringRef
> getKeys(const llvm::StringMap
<T
> &Map
) {
88 return llvm::DenseSet
<llvm::StringRef
>(Map
.keys().begin(), Map
.keys().end());
91 RecordStorageLocation
&DataflowAnalysisContext::createRecordStorageLocation(
92 QualType Type
, RecordStorageLocation::FieldToLoc FieldLocs
,
93 RecordStorageLocation::SyntheticFieldMap SyntheticFields
) {
94 assert(Type
->isRecordType());
95 assert(containsSameFields(getModeledFields(Type
), FieldLocs
));
96 assert(getKeys(getSyntheticFields(Type
)) == getKeys(SyntheticFields
));
98 RecordStorageLocationCreated
= true;
99 return arena().create
<RecordStorageLocation
>(Type
, std::move(FieldLocs
),
100 std::move(SyntheticFields
));
104 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl
&D
) {
105 if (auto *Loc
= DeclToLoc
.lookup(&D
))
107 auto &Loc
= createStorageLocation(D
.getType().getNonReferenceType());
108 DeclToLoc
[&D
] = &Loc
;
113 DataflowAnalysisContext::getStableStorageLocation(const Expr
&E
) {
114 const Expr
&CanonE
= ignoreCFGOmittedNodes(E
);
116 if (auto *Loc
= ExprToLoc
.lookup(&CanonE
))
118 auto &Loc
= createStorageLocation(CanonE
.getType());
119 ExprToLoc
[&CanonE
] = &Loc
;
124 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType
) {
125 auto CanonicalPointeeType
=
126 PointeeType
.isNull() ? PointeeType
: PointeeType
.getCanonicalType();
127 auto Res
= NullPointerVals
.try_emplace(CanonicalPointeeType
, nullptr);
129 auto &PointeeLoc
= createStorageLocation(CanonicalPointeeType
);
130 Res
.first
->second
= &arena().create
<PointerValue
>(PointeeLoc
);
132 return *Res
.first
->second
;
135 void DataflowAnalysisContext::addInvariant(const Formula
&Constraint
) {
136 if (Invariant
== nullptr)
137 Invariant
= &Constraint
;
139 Invariant
= &arena().makeAnd(*Invariant
, Constraint
);
142 void DataflowAnalysisContext::addFlowConditionConstraint(
143 Atom Token
, const Formula
&Constraint
) {
144 auto Res
= FlowConditionConstraints
.try_emplace(Token
, &Constraint
);
147 &arena().makeAnd(*Res
.first
->second
, Constraint
);
151 Atom
DataflowAnalysisContext::forkFlowCondition(Atom Token
) {
152 Atom ForkToken
= arena().makeFlowConditionToken();
153 FlowConditionDeps
[ForkToken
].insert(Token
);
154 addFlowConditionConstraint(ForkToken
, arena().makeAtomRef(Token
));
159 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken
,
161 Atom Token
= arena().makeFlowConditionToken();
162 FlowConditionDeps
[Token
].insert(FirstToken
);
163 FlowConditionDeps
[Token
].insert(SecondToken
);
164 addFlowConditionConstraint(Token
,
165 arena().makeOr(arena().makeAtomRef(FirstToken
),
166 arena().makeAtomRef(SecondToken
)));
170 Solver::Result
DataflowAnalysisContext::querySolver(
171 llvm::SetVector
<const Formula
*> Constraints
) {
172 return S
->solve(Constraints
.getArrayRef());
175 bool DataflowAnalysisContext::flowConditionImplies(Atom Token
,
177 // Returns true if and only if truth assignment of the flow condition implies
178 // that `F` is also true. We prove whether or not this property holds by
179 // reducing the problem to satisfiability checking. In other words, we attempt
180 // to show that assuming `F` is false makes the constraints induced by the
181 // flow condition unsatisfiable.
182 llvm::SetVector
<const Formula
*> Constraints
;
183 Constraints
.insert(&arena().makeAtomRef(Token
));
184 Constraints
.insert(&arena().makeNot(F
));
185 addTransitiveFlowConditionConstraints(Token
, Constraints
);
186 return isUnsatisfiable(std::move(Constraints
));
189 bool DataflowAnalysisContext::flowConditionAllows(Atom Token
,
191 llvm::SetVector
<const Formula
*> Constraints
;
192 Constraints
.insert(&arena().makeAtomRef(Token
));
193 Constraints
.insert(&F
);
194 addTransitiveFlowConditionConstraints(Token
, Constraints
);
195 return isSatisfiable(std::move(Constraints
));
198 bool DataflowAnalysisContext::equivalentFormulas(const Formula
&Val1
,
199 const Formula
&Val2
) {
200 llvm::SetVector
<const Formula
*> Constraints
;
201 Constraints
.insert(&arena().makeNot(arena().makeEquals(Val1
, Val2
)));
202 return isUnsatisfiable(std::move(Constraints
));
205 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
206 Atom Token
, llvm::SetVector
<const Formula
*> &Constraints
) {
207 llvm::DenseSet
<Atom
> AddedTokens
;
208 std::vector
<Atom
> Remaining
= {Token
};
211 Constraints
.insert(Invariant
);
212 // Define all the flow conditions that might be referenced in constraints.
213 while (!Remaining
.empty()) {
214 auto Token
= Remaining
.back();
215 Remaining
.pop_back();
216 if (!AddedTokens
.insert(Token
).second
)
219 auto ConstraintsIt
= FlowConditionConstraints
.find(Token
);
220 if (ConstraintsIt
== FlowConditionConstraints
.end()) {
221 Constraints
.insert(&arena().makeAtomRef(Token
));
223 // Bind flow condition token via `iff` to its set of constraints:
224 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
225 Constraints
.insert(&arena().makeEquals(arena().makeAtomRef(Token
),
226 *ConstraintsIt
->second
));
229 if (auto DepsIt
= FlowConditionDeps
.find(Token
);
230 DepsIt
!= FlowConditionDeps
.end())
231 for (Atom A
: DepsIt
->second
)
232 Remaining
.push_back(A
);
236 static void printAtomList(const llvm::SmallVector
<Atom
> &Atoms
,
237 llvm::raw_ostream
&OS
) {
239 for (size_t i
= 0; i
< Atoms
.size(); ++i
) {
241 if (i
+ 1 < Atoms
.size())
247 void DataflowAnalysisContext::dumpFlowCondition(Atom Token
,
248 llvm::raw_ostream
&OS
) {
249 llvm::SetVector
<const Formula
*> Constraints
;
250 Constraints
.insert(&arena().makeAtomRef(Token
));
251 addTransitiveFlowConditionConstraints(Token
, Constraints
);
253 OS
<< "Flow condition token: " << Token
<< "\n";
254 SimplifyConstraintsInfo Info
;
255 llvm::SetVector
<const Formula
*> OriginalConstraints
= Constraints
;
256 simplifyConstraints(Constraints
, arena(), &Info
);
257 if (!Constraints
.empty()) {
258 OS
<< "Constraints:\n";
259 for (const auto *Constraint
: Constraints
) {
260 Constraint
->print(OS
);
264 if (!Info
.TrueAtoms
.empty()) {
265 OS
<< "True atoms: ";
266 printAtomList(Info
.TrueAtoms
, OS
);
268 if (!Info
.FalseAtoms
.empty()) {
269 OS
<< "False atoms: ";
270 printAtomList(Info
.FalseAtoms
, OS
);
272 if (!Info
.EquivalentAtoms
.empty()) {
273 OS
<< "Equivalent atoms:\n";
274 for (const llvm::SmallVector
<Atom
> &Class
: Info
.EquivalentAtoms
)
275 printAtomList(Class
, OS
);
278 OS
<< "\nFlow condition constraints before simplification:\n";
279 for (const auto *Constraint
: OriginalConstraints
) {
280 Constraint
->print(OS
);
285 const ControlFlowContext
*
286 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl
*F
) {
287 // Canonicalize the key:
288 F
= F
->getDefinition();
291 auto It
= FunctionContexts
.find(F
);
292 if (It
!= FunctionContexts
.end())
296 auto CFCtx
= ControlFlowContext::build(*F
);
297 // FIXME: Handle errors.
299 auto Result
= FunctionContexts
.insert({F
, std::move(*CFCtx
)});
300 return &Result
.first
->second
;
306 static std::unique_ptr
<Logger
> makeLoggerFromCommandLine() {
307 if (DataflowLog
.empty())
308 return Logger::textual(llvm::errs());
310 llvm::StringRef Dir
= DataflowLog
;
311 if (auto EC
= llvm::sys::fs::create_directories(Dir
))
312 llvm::errs() << "Failed to create log dir: " << EC
.message() << "\n";
313 // All analysis runs within a process will log to the same directory.
314 // Share a counter so they don't all overwrite each other's 0.html.
315 // (Don't share a logger, it's not threadsafe).
316 static std::atomic
<unsigned> Counter
= {0};
318 [Dir(Dir
.str())]() mutable -> std::unique_ptr
<llvm::raw_ostream
> {
319 llvm::SmallString
<256> File(Dir
);
320 llvm::sys::path::append(File
,
321 std::to_string(Counter
.fetch_add(1)) + ".html");
323 auto OS
= std::make_unique
<llvm::raw_fd_ostream
>(File
, EC
);
325 llvm::errs() << "Failed to create log " << File
<< ": " << EC
.message()
327 return std::make_unique
<llvm::raw_null_ostream
>();
331 return Logger::html(std::move(StreamFactory
));
334 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr
<Solver
> S
,
336 : S(std::move(S
)), A(std::make_unique
<Arena
>()), Opts(Opts
) {
337 assert(this->S
!= nullptr);
338 // If the -dataflow-log command-line flag was set, synthesize a logger.
339 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
341 if (Opts
.Log
== nullptr) {
342 if (DataflowLog
.getNumOccurrences() > 0) {
343 LogOwner
= makeLoggerFromCommandLine();
344 this->Opts
.Log
= LogOwner
.get();
345 // FIXME: if the flag is given a value, write an HTML log to a file.
347 this->Opts
.Log
= &Logger::null();
352 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
354 } // namespace dataflow
357 using namespace clang
;
359 const Expr
&clang::dataflow::ignoreCFGOmittedNodes(const Expr
&E
) {
360 const Expr
*Current
= &E
;
361 if (auto *EWC
= dyn_cast
<ExprWithCleanups
>(Current
)) {
362 Current
= EWC
->getSubExpr();
363 assert(Current
!= nullptr);
365 Current
= Current
->IgnoreParens();
366 assert(Current
!= nullptr);
370 const Stmt
&clang::dataflow::ignoreCFGOmittedNodes(const Stmt
&S
) {
371 if (auto *E
= dyn_cast
<Expr
>(&S
))
372 return ignoreCFGOmittedNodes(*E
);
376 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
377 // field decl will be modeled for all instances of the inherited field.
378 static void getFieldsFromClassHierarchy(QualType Type
,
379 clang::dataflow::FieldSet
&Fields
) {
380 if (Type
->isIncompleteType() || Type
->isDependentType() ||
381 !Type
->isRecordType())
384 for (const FieldDecl
*Field
: Type
->getAsRecordDecl()->fields())
385 Fields
.insert(Field
);
386 if (auto *CXXRecord
= Type
->getAsCXXRecordDecl())
387 for (const CXXBaseSpecifier
&Base
: CXXRecord
->bases())
388 getFieldsFromClassHierarchy(Base
.getType(), Fields
);
391 /// Gets the set of all fields in the type.
392 clang::dataflow::FieldSet
clang::dataflow::getObjectFields(QualType Type
) {
394 getFieldsFromClassHierarchy(Type
, Fields
);
398 bool clang::dataflow::containsSameFields(
399 const clang::dataflow::FieldSet
&Fields
,
400 const clang::dataflow::RecordStorageLocation::FieldToLoc
&FieldLocs
) {
401 if (Fields
.size() != FieldLocs
.size())
403 for ([[maybe_unused
]] auto [Field
, Loc
] : FieldLocs
)
404 if (!Fields
.contains(cast_or_null
<FieldDecl
>(Field
)))