1 //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Defines basic, non-domain-specific mechanisms for tracking tainted values.
11 //===----------------------------------------------------------------------===//
13 #include "clang/StaticAnalyzer/Checkers/Taint.h"
14 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
19 using namespace clang
;
21 using namespace taint
;
23 // Fully tainted symbols.
24 REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap
, SymbolRef
, TaintTagType
)
26 // Partially tainted symbols.
27 REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions
, const SubRegion
*,
29 REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint
, SymbolRef
, TaintedSubRegions
)
31 void taint::printTaint(ProgramStateRef State
, raw_ostream
&Out
, const char *NL
,
33 TaintMapTy TM
= State
->get
<TaintMap
>();
36 Out
<< "Tainted symbols:" << NL
;
38 for (const auto &I
: TM
)
39 Out
<< I
.first
<< " : " << I
.second
<< NL
;
42 void taint::dumpTaint(ProgramStateRef State
) {
43 printTaint(State
, llvm::errs());
46 ProgramStateRef
taint::addTaint(ProgramStateRef State
, const Stmt
*S
,
47 const LocationContext
*LCtx
,
49 return addTaint(State
, State
->getSVal(S
, LCtx
), Kind
);
52 ProgramStateRef
taint::addTaint(ProgramStateRef State
, SVal V
,
54 SymbolRef Sym
= V
.getAsSymbol();
56 return addTaint(State
, Sym
, Kind
);
58 // If the SVal represents a structure, try to mass-taint all values within the
59 // structure. For now it only works efficiently on lazy compound values that
60 // were conjured during a conservative evaluation of a function - either as
61 // return values of functions that return structures or arrays by value, or as
62 // values of structures or arrays passed into the function by reference,
63 // directly or through pointer aliasing. Such lazy compound values are
64 // characterized by having exactly one binding in their captured store within
65 // their parent region, which is a conjured symbol default-bound to the base
66 // region of the parent region.
67 if (auto LCV
= V
.getAs
<nonloc::LazyCompoundVal
>()) {
68 if (std::optional
<SVal
> binding
=
69 State
->getStateManager().getStoreManager().getDefaultBinding(
71 if (SymbolRef Sym
= binding
->getAsSymbol())
72 return addPartialTaint(State
, Sym
, LCV
->getRegion(), Kind
);
76 const MemRegion
*R
= V
.getAsRegion();
77 return addTaint(State
, R
, Kind
);
80 ProgramStateRef
taint::addTaint(ProgramStateRef State
, const MemRegion
*R
,
82 if (const SymbolicRegion
*SR
= dyn_cast_or_null
<SymbolicRegion
>(R
))
83 return addTaint(State
, SR
->getSymbol(), Kind
);
87 ProgramStateRef
taint::addTaint(ProgramStateRef State
, SymbolRef Sym
,
89 // If this is a symbol cast, remove the cast before adding the taint. Taint
91 while (const SymbolCast
*SC
= dyn_cast
<SymbolCast
>(Sym
))
92 Sym
= SC
->getOperand();
94 ProgramStateRef NewState
= State
->set
<TaintMap
>(Sym
, Kind
);
99 ProgramStateRef
taint::removeTaint(ProgramStateRef State
, SVal V
) {
100 SymbolRef Sym
= V
.getAsSymbol();
102 return removeTaint(State
, Sym
);
104 const MemRegion
*R
= V
.getAsRegion();
105 return removeTaint(State
, R
);
108 ProgramStateRef
taint::removeTaint(ProgramStateRef State
, const MemRegion
*R
) {
109 if (const SymbolicRegion
*SR
= dyn_cast_or_null
<SymbolicRegion
>(R
))
110 return removeTaint(State
, SR
->getSymbol());
114 ProgramStateRef
taint::removeTaint(ProgramStateRef State
, SymbolRef Sym
) {
115 // If this is a symbol cast, remove the cast before adding the taint. Taint
117 while (const SymbolCast
*SC
= dyn_cast
<SymbolCast
>(Sym
))
118 Sym
= SC
->getOperand();
120 ProgramStateRef NewState
= State
->remove
<TaintMap
>(Sym
);
125 ProgramStateRef
taint::addPartialTaint(ProgramStateRef State
,
127 const SubRegion
*SubRegion
,
129 // Ignore partial taint if the entire parent symbol is already tainted.
130 if (const TaintTagType
*T
= State
->get
<TaintMap
>(ParentSym
))
134 // Partial taint applies if only a portion of the symbol is tainted.
135 if (SubRegion
== SubRegion
->getBaseRegion())
136 return addTaint(State
, ParentSym
, Kind
);
138 const TaintedSubRegions
*SavedRegs
= State
->get
<DerivedSymTaint
>(ParentSym
);
139 TaintedSubRegions::Factory
&F
= State
->get_context
<TaintedSubRegions
>();
140 TaintedSubRegions Regs
= SavedRegs
? *SavedRegs
: F
.getEmptyMap();
142 Regs
= F
.add(Regs
, SubRegion
, Kind
);
143 ProgramStateRef NewState
= State
->set
<DerivedSymTaint
>(ParentSym
, Regs
);
148 bool taint::isTainted(ProgramStateRef State
, const Stmt
*S
,
149 const LocationContext
*LCtx
, TaintTagType Kind
) {
150 return !getTaintedSymbolsImpl(State
, S
, LCtx
, Kind
, /*ReturnFirstOnly=*/true)
154 bool taint::isTainted(ProgramStateRef State
, SVal V
, TaintTagType Kind
) {
155 return !getTaintedSymbolsImpl(State
, V
, Kind
, /*ReturnFirstOnly=*/true)
159 bool taint::isTainted(ProgramStateRef State
, const MemRegion
*Reg
,
161 return !getTaintedSymbolsImpl(State
, Reg
, K
, /*ReturnFirstOnly=*/true)
165 bool taint::isTainted(ProgramStateRef State
, SymbolRef Sym
, TaintTagType Kind
) {
166 return !getTaintedSymbolsImpl(State
, Sym
, Kind
, /*ReturnFirstOnly=*/true)
170 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
,
172 const LocationContext
*LCtx
,
174 return getTaintedSymbolsImpl(State
, S
, LCtx
, Kind
, /*ReturnFirstOnly=*/false);
177 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
, SVal V
,
179 return getTaintedSymbolsImpl(State
, V
, Kind
, /*ReturnFirstOnly=*/false);
182 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
,
185 return getTaintedSymbolsImpl(State
, Sym
, Kind
, /*ReturnFirstOnly=*/false);
188 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
,
189 const MemRegion
*Reg
,
191 return getTaintedSymbolsImpl(State
, Reg
, Kind
, /*ReturnFirstOnly=*/false);
194 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
196 const LocationContext
*LCtx
,
198 bool returnFirstOnly
) {
199 SVal val
= State
->getSVal(S
, LCtx
);
200 return getTaintedSymbolsImpl(State
, val
, Kind
, returnFirstOnly
);
203 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
204 SVal V
, TaintTagType Kind
,
205 bool returnFirstOnly
) {
206 if (SymbolRef Sym
= V
.getAsSymbol())
207 return getTaintedSymbolsImpl(State
, Sym
, Kind
, returnFirstOnly
);
208 if (const MemRegion
*Reg
= V
.getAsRegion())
209 return getTaintedSymbolsImpl(State
, Reg
, Kind
, returnFirstOnly
);
211 if (auto LCV
= V
.getAs
<nonloc::LazyCompoundVal
>()) {
212 StoreManager
&StoreMgr
= State
->getStateManager().getStoreManager();
213 if (auto DefaultVal
= StoreMgr
.getDefaultBinding(*LCV
)) {
214 return getTaintedSymbolsImpl(State
, *DefaultVal
, Kind
, returnFirstOnly
);
221 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
222 const MemRegion
*Reg
,
224 bool returnFirstOnly
) {
225 std::vector
<SymbolRef
> TaintedSymbols
;
227 return TaintedSymbols
;
229 // Element region (array element) is tainted if the offset is tainted.
230 if (const ElementRegion
*ER
= dyn_cast
<ElementRegion
>(Reg
)) {
231 std::vector
<SymbolRef
> TaintedIndex
=
232 getTaintedSymbolsImpl(State
, ER
->getIndex(), K
, returnFirstOnly
);
233 llvm::append_range(TaintedSymbols
, TaintedIndex
);
234 if (returnFirstOnly
&& !TaintedSymbols
.empty())
235 return TaintedSymbols
; // return early if needed
238 // Symbolic region is tainted if the corresponding symbol is tainted.
239 if (const SymbolicRegion
*SR
= dyn_cast
<SymbolicRegion
>(Reg
)) {
240 std::vector
<SymbolRef
> TaintedRegions
=
241 getTaintedSymbolsImpl(State
, SR
->getSymbol(), K
, returnFirstOnly
);
242 llvm::append_range(TaintedSymbols
, TaintedRegions
);
243 if (returnFirstOnly
&& !TaintedSymbols
.empty())
244 return TaintedSymbols
; // return early if needed
247 // Any subregion (including Element and Symbolic regions) is tainted if its
248 // super-region is tainted.
249 if (const SubRegion
*ER
= dyn_cast
<SubRegion
>(Reg
)) {
250 std::vector
<SymbolRef
> TaintedSubRegions
=
251 getTaintedSymbolsImpl(State
, ER
->getSuperRegion(), K
, returnFirstOnly
);
252 llvm::append_range(TaintedSymbols
, TaintedSubRegions
);
253 if (returnFirstOnly
&& !TaintedSymbols
.empty())
254 return TaintedSymbols
; // return early if needed
257 return TaintedSymbols
;
260 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
263 bool returnFirstOnly
) {
264 std::vector
<SymbolRef
> TaintedSymbols
;
266 return TaintedSymbols
;
268 // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
269 if (const auto &Opts
= State
->getAnalysisManager().getAnalyzerOptions();
270 Sym
->computeComplexity() > Opts
.MaxTaintedSymbolComplexity
) {
274 // Traverse all the symbols this symbol depends on to see if any are tainted.
275 for (SymbolRef SubSym
: Sym
->symbols()) {
276 if (!isa
<SymbolData
>(SubSym
))
279 if (const TaintTagType
*Tag
= State
->get
<TaintMap
>(SubSym
)) {
281 TaintedSymbols
.push_back(SubSym
);
283 return TaintedSymbols
; // return early if needed
287 if (const auto *SD
= dyn_cast
<SymbolDerived
>(SubSym
)) {
288 // If this is a SymbolDerived with a tainted parent, it's also tainted.
289 std::vector
<SymbolRef
> TaintedParents
= getTaintedSymbolsImpl(
290 State
, SD
->getParentSymbol(), Kind
, returnFirstOnly
);
291 llvm::append_range(TaintedSymbols
, TaintedParents
);
292 if (returnFirstOnly
&& !TaintedSymbols
.empty())
293 return TaintedSymbols
; // return early if needed
295 // If this is a SymbolDerived with the same parent symbol as another
296 // tainted SymbolDerived and a region that's a sub-region of that
297 // tainted symbol, it's also tainted.
298 if (const TaintedSubRegions
*Regs
=
299 State
->get
<DerivedSymTaint
>(SD
->getParentSymbol())) {
300 const TypedValueRegion
*R
= SD
->getRegion();
301 for (auto I
: *Regs
) {
302 // FIXME: The logic to identify tainted regions could be more
303 // complete. For example, this would not currently identify
304 // overlapping fields in a union as tainted. To identify this we can
305 // check for overlapping/nested byte offsets.
306 if (Kind
== I
.second
&& R
->isSubRegionOf(I
.first
)) {
307 TaintedSymbols
.push_back(SD
->getParentSymbol());
308 if (returnFirstOnly
&& !TaintedSymbols
.empty())
309 return TaintedSymbols
; // return early if needed
315 // If memory region is tainted, data is also tainted.
316 if (const auto *SRV
= dyn_cast
<SymbolRegionValue
>(SubSym
)) {
317 std::vector
<SymbolRef
> TaintedRegions
=
318 getTaintedSymbolsImpl(State
, SRV
->getRegion(), Kind
, returnFirstOnly
);
319 llvm::append_range(TaintedSymbols
, TaintedRegions
);
320 if (returnFirstOnly
&& !TaintedSymbols
.empty())
321 return TaintedSymbols
; // return early if needed
324 // If this is a SymbolCast from a tainted value, it's also tainted.
325 if (const auto *SC
= dyn_cast
<SymbolCast
>(SubSym
)) {
326 std::vector
<SymbolRef
> TaintedCasts
=
327 getTaintedSymbolsImpl(State
, SC
->getOperand(), Kind
, returnFirstOnly
);
328 llvm::append_range(TaintedSymbols
, TaintedCasts
);
329 if (returnFirstOnly
&& !TaintedSymbols
.empty())
330 return TaintedSymbols
; // return early if needed
333 return TaintedSymbols
;