1 //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Defines basic, non-domain-specific mechanisms for tracking tainted values.
11 //===----------------------------------------------------------------------===//
13 #include "clang/StaticAnalyzer/Checkers/Taint.h"
14 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 using namespace clang
;
20 using namespace taint
;
22 // Fully tainted symbols.
23 REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap
, SymbolRef
, TaintTagType
)
25 // Partially tainted symbols.
26 REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions
, const SubRegion
*,
28 REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint
, SymbolRef
, TaintedSubRegions
)
30 void taint::printTaint(ProgramStateRef State
, raw_ostream
&Out
, const char *NL
,
32 TaintMapTy TM
= State
->get
<TaintMap
>();
35 Out
<< "Tainted symbols:" << NL
;
37 for (const auto &I
: TM
)
38 Out
<< I
.first
<< " : " << I
.second
<< NL
;
41 void taint::dumpTaint(ProgramStateRef State
) {
42 printTaint(State
, llvm::errs());
45 ProgramStateRef
taint::addTaint(ProgramStateRef State
, const Stmt
*S
,
46 const LocationContext
*LCtx
,
48 return addTaint(State
, State
->getSVal(S
, LCtx
), Kind
);
51 ProgramStateRef
taint::addTaint(ProgramStateRef State
, SVal V
,
53 SymbolRef Sym
= V
.getAsSymbol();
55 return addTaint(State
, Sym
, Kind
);
57 // If the SVal represents a structure, try to mass-taint all values within the
58 // structure. For now it only works efficiently on lazy compound values that
59 // were conjured during a conservative evaluation of a function - either as
60 // return values of functions that return structures or arrays by value, or as
61 // values of structures or arrays passed into the function by reference,
62 // directly or through pointer aliasing. Such lazy compound values are
63 // characterized by having exactly one binding in their captured store within
64 // their parent region, which is a conjured symbol default-bound to the base
65 // region of the parent region.
66 if (auto LCV
= V
.getAs
<nonloc::LazyCompoundVal
>()) {
67 if (std::optional
<SVal
> binding
=
68 State
->getStateManager().getStoreManager().getDefaultBinding(
70 if (SymbolRef Sym
= binding
->getAsSymbol())
71 return addPartialTaint(State
, Sym
, LCV
->getRegion(), Kind
);
75 const MemRegion
*R
= V
.getAsRegion();
76 return addTaint(State
, R
, Kind
);
79 ProgramStateRef
taint::addTaint(ProgramStateRef State
, const MemRegion
*R
,
81 if (const SymbolicRegion
*SR
= dyn_cast_or_null
<SymbolicRegion
>(R
))
82 return addTaint(State
, SR
->getSymbol(), Kind
);
86 ProgramStateRef
taint::addTaint(ProgramStateRef State
, SymbolRef Sym
,
88 // If this is a symbol cast, remove the cast before adding the taint. Taint
90 while (const SymbolCast
*SC
= dyn_cast
<SymbolCast
>(Sym
))
91 Sym
= SC
->getOperand();
93 ProgramStateRef NewState
= State
->set
<TaintMap
>(Sym
, Kind
);
98 ProgramStateRef
taint::removeTaint(ProgramStateRef State
, SVal V
) {
99 SymbolRef Sym
= V
.getAsSymbol();
101 return removeTaint(State
, Sym
);
103 const MemRegion
*R
= V
.getAsRegion();
104 return removeTaint(State
, R
);
107 ProgramStateRef
taint::removeTaint(ProgramStateRef State
, const MemRegion
*R
) {
108 if (const SymbolicRegion
*SR
= dyn_cast_or_null
<SymbolicRegion
>(R
))
109 return removeTaint(State
, SR
->getSymbol());
113 ProgramStateRef
taint::removeTaint(ProgramStateRef State
, SymbolRef Sym
) {
114 // If this is a symbol cast, remove the cast before adding the taint. Taint
116 while (const SymbolCast
*SC
= dyn_cast
<SymbolCast
>(Sym
))
117 Sym
= SC
->getOperand();
119 ProgramStateRef NewState
= State
->remove
<TaintMap
>(Sym
);
124 ProgramStateRef
taint::addPartialTaint(ProgramStateRef State
,
126 const SubRegion
*SubRegion
,
128 // Ignore partial taint if the entire parent symbol is already tainted.
129 if (const TaintTagType
*T
= State
->get
<TaintMap
>(ParentSym
))
133 // Partial taint applies if only a portion of the symbol is tainted.
134 if (SubRegion
== SubRegion
->getBaseRegion())
135 return addTaint(State
, ParentSym
, Kind
);
137 const TaintedSubRegions
*SavedRegs
= State
->get
<DerivedSymTaint
>(ParentSym
);
138 TaintedSubRegions::Factory
&F
= State
->get_context
<TaintedSubRegions
>();
139 TaintedSubRegions Regs
= SavedRegs
? *SavedRegs
: F
.getEmptyMap();
141 Regs
= F
.add(Regs
, SubRegion
, Kind
);
142 ProgramStateRef NewState
= State
->set
<DerivedSymTaint
>(ParentSym
, Regs
);
147 bool taint::isTainted(ProgramStateRef State
, const Stmt
*S
,
148 const LocationContext
*LCtx
, TaintTagType Kind
) {
149 return !getTaintedSymbolsImpl(State
, S
, LCtx
, Kind
, /*ReturnFirstOnly=*/true)
153 bool taint::isTainted(ProgramStateRef State
, SVal V
, TaintTagType Kind
) {
154 return !getTaintedSymbolsImpl(State
, V
, Kind
, /*ReturnFirstOnly=*/true)
158 bool taint::isTainted(ProgramStateRef State
, const MemRegion
*Reg
,
160 return !getTaintedSymbolsImpl(State
, Reg
, K
, /*ReturnFirstOnly=*/true)
164 bool taint::isTainted(ProgramStateRef State
, SymbolRef Sym
, TaintTagType Kind
) {
165 return !getTaintedSymbolsImpl(State
, Sym
, Kind
, /*ReturnFirstOnly=*/true)
169 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
,
171 const LocationContext
*LCtx
,
173 return getTaintedSymbolsImpl(State
, S
, LCtx
, Kind
, /*ReturnFirstOnly=*/false);
176 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
, SVal V
,
178 return getTaintedSymbolsImpl(State
, V
, Kind
, /*ReturnFirstOnly=*/false);
181 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
,
184 return getTaintedSymbolsImpl(State
, Sym
, Kind
, /*ReturnFirstOnly=*/false);
187 std::vector
<SymbolRef
> taint::getTaintedSymbols(ProgramStateRef State
,
188 const MemRegion
*Reg
,
190 return getTaintedSymbolsImpl(State
, Reg
, Kind
, /*ReturnFirstOnly=*/false);
193 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
195 const LocationContext
*LCtx
,
197 bool returnFirstOnly
) {
198 SVal val
= State
->getSVal(S
, LCtx
);
199 return getTaintedSymbolsImpl(State
, val
, Kind
, returnFirstOnly
);
202 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
203 SVal V
, TaintTagType Kind
,
204 bool returnFirstOnly
) {
205 if (SymbolRef Sym
= V
.getAsSymbol())
206 return getTaintedSymbolsImpl(State
, Sym
, Kind
, returnFirstOnly
);
207 if (const MemRegion
*Reg
= V
.getAsRegion())
208 return getTaintedSymbolsImpl(State
, Reg
, Kind
, returnFirstOnly
);
212 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
213 const MemRegion
*Reg
,
215 bool returnFirstOnly
) {
216 std::vector
<SymbolRef
> TaintedSymbols
;
218 return TaintedSymbols
;
219 // Element region (array element) is tainted if either the base or the offset
221 if (const ElementRegion
*ER
= dyn_cast
<ElementRegion
>(Reg
)) {
222 std::vector
<SymbolRef
> TaintedIndex
=
223 getTaintedSymbolsImpl(State
, ER
->getIndex(), K
, returnFirstOnly
);
224 llvm::append_range(TaintedSymbols
, TaintedIndex
);
225 if (returnFirstOnly
&& !TaintedSymbols
.empty())
226 return TaintedSymbols
; // return early if needed
227 std::vector
<SymbolRef
> TaintedSuperRegion
=
228 getTaintedSymbolsImpl(State
, ER
->getSuperRegion(), K
, returnFirstOnly
);
229 llvm::append_range(TaintedSymbols
, TaintedSuperRegion
);
230 if (returnFirstOnly
&& !TaintedSymbols
.empty())
231 return TaintedSymbols
; // return early if needed
234 if (const SymbolicRegion
*SR
= dyn_cast
<SymbolicRegion
>(Reg
)) {
235 std::vector
<SymbolRef
> TaintedRegions
=
236 getTaintedSymbolsImpl(State
, SR
->getSymbol(), K
, returnFirstOnly
);
237 llvm::append_range(TaintedSymbols
, TaintedRegions
);
238 if (returnFirstOnly
&& !TaintedSymbols
.empty())
239 return TaintedSymbols
; // return early if needed
242 if (const SubRegion
*ER
= dyn_cast
<SubRegion
>(Reg
)) {
243 std::vector
<SymbolRef
> TaintedSubRegions
=
244 getTaintedSymbolsImpl(State
, ER
->getSuperRegion(), K
, returnFirstOnly
);
245 llvm::append_range(TaintedSymbols
, TaintedSubRegions
);
246 if (returnFirstOnly
&& !TaintedSymbols
.empty())
247 return TaintedSymbols
; // return early if needed
250 return TaintedSymbols
;
253 std::vector
<SymbolRef
> taint::getTaintedSymbolsImpl(ProgramStateRef State
,
256 bool returnFirstOnly
) {
257 std::vector
<SymbolRef
> TaintedSymbols
;
259 return TaintedSymbols
;
261 // Traverse all the symbols this symbol depends on to see if any are tainted.
262 for (SymbolRef SubSym
: Sym
->symbols()) {
263 if (!isa
<SymbolData
>(SubSym
))
266 if (const TaintTagType
*Tag
= State
->get
<TaintMap
>(SubSym
)) {
268 TaintedSymbols
.push_back(SubSym
);
270 return TaintedSymbols
; // return early if needed
274 if (const auto *SD
= dyn_cast
<SymbolDerived
>(SubSym
)) {
275 // If this is a SymbolDerived with a tainted parent, it's also tainted.
276 std::vector
<SymbolRef
> TaintedParents
= getTaintedSymbolsImpl(
277 State
, SD
->getParentSymbol(), Kind
, returnFirstOnly
);
278 llvm::append_range(TaintedSymbols
, TaintedParents
);
279 if (returnFirstOnly
&& !TaintedSymbols
.empty())
280 return TaintedSymbols
; // return early if needed
282 // If this is a SymbolDerived with the same parent symbol as another
283 // tainted SymbolDerived and a region that's a sub-region of that
284 // tainted symbol, it's also tainted.
285 if (const TaintedSubRegions
*Regs
=
286 State
->get
<DerivedSymTaint
>(SD
->getParentSymbol())) {
287 const TypedValueRegion
*R
= SD
->getRegion();
288 for (auto I
: *Regs
) {
289 // FIXME: The logic to identify tainted regions could be more
290 // complete. For example, this would not currently identify
291 // overlapping fields in a union as tainted. To identify this we can
292 // check for overlapping/nested byte offsets.
293 if (Kind
== I
.second
&& R
->isSubRegionOf(I
.first
)) {
294 TaintedSymbols
.push_back(SD
->getParentSymbol());
295 if (returnFirstOnly
&& !TaintedSymbols
.empty())
296 return TaintedSymbols
; // return early if needed
302 // If memory region is tainted, data is also tainted.
303 if (const auto *SRV
= dyn_cast
<SymbolRegionValue
>(SubSym
)) {
304 std::vector
<SymbolRef
> TaintedRegions
=
305 getTaintedSymbolsImpl(State
, SRV
->getRegion(), Kind
, returnFirstOnly
);
306 llvm::append_range(TaintedSymbols
, TaintedRegions
);
307 if (returnFirstOnly
&& !TaintedSymbols
.empty())
308 return TaintedSymbols
; // return early if needed
311 // If this is a SymbolCast from a tainted value, it's also tainted.
312 if (const auto *SC
= dyn_cast
<SymbolCast
>(SubSym
)) {
313 std::vector
<SymbolRef
> TaintedCasts
=
314 getTaintedSymbolsImpl(State
, SC
->getOperand(), Kind
, returnFirstOnly
);
315 llvm::append_range(TaintedSymbols
, TaintedCasts
);
316 if (returnFirstOnly
&& !TaintedSymbols
.empty())
317 return TaintedSymbols
; // return early if needed
320 return TaintedSymbols
;