1 //===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // fuzzer::DataFlowTrace
9 //===----------------------------------------------------------------------===//
11 #include "FuzzerDataFlowTrace.h"
13 #include "FuzzerCommand.h"
15 #include "FuzzerRandom.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerUtil.h"
25 #include <unordered_map>
26 #include <unordered_set>
30 static const char *kFunctionsTxt
= "functions.txt";
32 bool BlockCoverage::AppendCoverage(const std::string
&S
) {
33 std::stringstream
SS(S
);
34 return AppendCoverage(SS
);
37 // Coverage lines have this form:
39 // where N is the number of the function, T is the total number of instrumented
40 // BBs, and X,Y,Z, if present, are the indecies of covered BB.
41 // BB #0, which is the entry block, is not explicitly listed.
42 bool BlockCoverage::AppendCoverage(std::istream
&IN
) {
44 while (std::getline(IN
, L
, '\n')) {
47 std::stringstream
SS(L
.c_str() + 1);
48 size_t FunctionId
= 0;
51 FunctionsWithDFT
.insert(FunctionId
);
54 if (L
[0] != 'C') continue;
55 Vector
<uint32_t> CoveredBlocks
;
60 CoveredBlocks
.push_back(BB
);
62 if (CoveredBlocks
.empty()) return false;
63 uint32_t NumBlocks
= CoveredBlocks
.back();
64 CoveredBlocks
.pop_back();
65 for (auto BB
: CoveredBlocks
)
66 if (BB
>= NumBlocks
) return false;
67 auto It
= Functions
.find(FunctionId
);
70 ? Functions
.insert({FunctionId
, Vector
<uint32_t>(NumBlocks
)})
74 if (Counters
.size() != NumBlocks
) return false; // wrong number of blocks.
77 for (auto BB
: CoveredBlocks
)
83 // Assign weights to each function.
84 // General principles:
85 // * any uncovered function gets weight 0.
86 // * a function with lots of uncovered blocks gets bigger weight.
87 // * a function with a less frequently executed code gets bigger weight.
88 Vector
<double> BlockCoverage::FunctionWeights(size_t NumFunctions
) const {
89 Vector
<double> Res(NumFunctions
);
90 for (auto It
: Functions
) {
91 auto FunctionID
= It
.first
;
92 auto Counters
= It
.second
;
93 assert(FunctionID
< NumFunctions
);
94 auto &Weight
= Res
[FunctionID
];
95 // Give higher weight if the function has a DFT.
96 Weight
= FunctionsWithDFT
.count(FunctionID
) ? 1000. : 1;
97 // Give higher weight to functions with less frequently seen basic blocks.
98 Weight
/= SmallestNonZeroCounter(Counters
);
99 // Give higher weight to functions with the most uncovered basic blocks.
100 Weight
*= NumberOfUncoveredBlocks(Counters
) + 1;
105 int DataFlowTrace::ReadCoverage(const std::string
&DirPath
) {
106 Vector
<SizedFile
> Files
;
107 int Res
= GetSizedFilesFromDir(DirPath
, &Files
);
110 for (auto &SF
: Files
) {
111 auto Name
= Basename(SF
.File
);
112 if (Name
== kFunctionsTxt
) continue;
113 if (!CorporaHashes
.count(Name
)) continue;
114 std::ifstream
IF(SF
.File
);
115 Coverage
.AppendCoverage(IF
);
120 static void DFTStringAppendToVector(Vector
<uint8_t> *DFT
,
121 const std::string
&DFTString
) {
122 assert(DFT
->size() == DFTString
.size());
123 for (size_t I
= 0, Len
= DFT
->size(); I
< Len
; I
++)
124 (*DFT
)[I
] = DFTString
[I
] == '1';
127 // converts a string of '0' and '1' into a Vector<uint8_t>
128 static Vector
<uint8_t> DFTStringToVector(const std::string
&DFTString
) {
129 Vector
<uint8_t> DFT(DFTString
.size());
130 DFTStringAppendToVector(&DFT
, DFTString
);
134 static bool ParseError(const char *Err
, const std::string
&Line
) {
135 Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err
, Line
.c_str());
139 // TODO(metzman): replace std::string with std::string_view for
140 // better performance. Need to figure our how to use string_view on Windows.
141 static bool ParseDFTLine(const std::string
&Line
, size_t *FunctionNum
,
142 std::string
*DFTString
) {
143 if (!Line
.empty() && Line
[0] != 'F')
144 return false; // Ignore coverage.
145 size_t SpacePos
= Line
.find(' ');
146 if (SpacePos
== std::string::npos
)
147 return ParseError("no space in the trace line", Line
);
148 if (Line
.empty() || Line
[0] != 'F')
149 return ParseError("the trace line doesn't start with 'F'", Line
);
150 *FunctionNum
= std::atol(Line
.c_str() + 1);
151 const char *Beg
= Line
.c_str() + SpacePos
+ 1;
152 const char *End
= Line
.c_str() + Line
.size();
154 size_t Len
= End
- Beg
;
155 for (size_t I
= 0; I
< Len
; I
++) {
156 if (Beg
[I
] != '0' && Beg
[I
] != '1')
157 return ParseError("the trace should contain only 0 or 1", Line
);
163 int DataFlowTrace::Init(const std::string
&DirPath
, std::string
*FocusFunction
,
164 Vector
<SizedFile
> &CorporaFiles
, Random
&Rand
) {
165 if (DirPath
.empty()) return 0;
166 Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath
.c_str());
167 Vector
<SizedFile
> Files
;
168 int Res
= GetSizedFilesFromDir(DirPath
, &Files
);
172 size_t FocusFuncIdx
= SIZE_MAX
;
173 Vector
<std::string
> FunctionNames
;
175 // Collect the hashes of the corpus files.
176 for (auto &SF
: CorporaFiles
)
177 CorporaHashes
.insert(Hash(FileToVector(SF
.File
)));
179 // Read functions.txt
180 std::ifstream
IF(DirPlusFile(DirPath
, kFunctionsTxt
));
181 size_t NumFunctions
= 0;
182 while (std::getline(IF
, L
, '\n')) {
183 FunctionNames
.push_back(L
);
185 if (*FocusFunction
== L
)
186 FocusFuncIdx
= NumFunctions
- 1;
191 if (*FocusFunction
== "auto") {
192 // AUTOFOCUS works like this:
193 // * reads the coverage data from the DFT files.
194 // * assigns weights to functions based on coverage.
195 // * chooses a random function according to the weights.
196 Res
= ReadCoverage(DirPath
);
199 auto Weights
= Coverage
.FunctionWeights(NumFunctions
);
200 Vector
<double> Intervals(NumFunctions
+ 1);
201 std::iota(Intervals
.begin(), Intervals
.end(), 0);
202 auto Distribution
= std::piecewise_constant_distribution
<double>(
203 Intervals
.begin(), Intervals
.end(), Weights
.begin());
204 FocusFuncIdx
= static_cast<size_t>(Distribution(Rand
));
205 *FocusFunction
= FunctionNames
[FocusFuncIdx
];
206 assert(FocusFuncIdx
< NumFunctions
);
207 Printf("INFO: AUTOFOCUS: %zd %s\n", FocusFuncIdx
,
208 FunctionNames
[FocusFuncIdx
].c_str());
209 for (size_t i
= 0; i
< NumFunctions
; i
++) {
210 if (!Weights
[i
]) continue;
211 Printf(" [%zd] W %g\tBB-tot %u\tBB-cov %u\tEntryFreq %u:\t%s\n", i
,
212 Weights
[i
], Coverage
.GetNumberOfBlocks(i
),
213 Coverage
.GetNumberOfCoveredBlocks(i
), Coverage
.GetCounter(i
, 0),
214 FunctionNames
[i
].c_str());
218 if (!NumFunctions
|| FocusFuncIdx
== SIZE_MAX
|| Files
.size() <= 1)
222 size_t NumTraceFiles
= 0;
223 size_t NumTracesWithFocusFunction
= 0;
224 for (auto &SF
: Files
) {
225 auto Name
= Basename(SF
.File
);
226 if (Name
== kFunctionsTxt
) continue;
227 if (!CorporaHashes
.count(Name
)) continue; // not in the corpus.
229 // Printf("=== %s\n", Name.c_str());
230 std::ifstream
IF(SF
.File
);
231 while (std::getline(IF
, L
, '\n')) {
232 size_t FunctionNum
= 0;
233 std::string DFTString
;
234 if (ParseDFTLine(L
, &FunctionNum
, &DFTString
) &&
235 FunctionNum
== FocusFuncIdx
) {
236 NumTracesWithFocusFunction
++;
238 if (FunctionNum
>= NumFunctions
) {
239 ParseError("N is greater than the number of functions", L
);
242 Traces
[Name
] = DFTStringToVector(DFTString
);
243 // Print just a few small traces.
244 if (NumTracesWithFocusFunction
<= 3 && DFTString
.size() <= 16)
245 Printf("%s => |%s|\n", Name
.c_str(), std::string(DFTString
).c_str());
246 break; // No need to parse the following lines.
250 Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
251 "%zd traces with focus function\n",
252 NumTraceFiles
, NumFunctions
, NumTracesWithFocusFunction
);
256 int CollectDataFlow(const std::string
&DFTBinary
, const std::string
&DirPath
,
257 const Vector
<SizedFile
> &CorporaFiles
) {
258 Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
259 DFTBinary
.c_str(), DirPath
.c_str(), CorporaFiles
.size());
260 if (CorporaFiles
.empty()) {
261 Printf("ERROR: can't collect data flow without corpus provided.");
265 static char DFSanEnv
[] = "DFSAN_OPTIONS=warn_unimplemented=0";
268 for (auto &F
: CorporaFiles
) {
269 // For every input F we need to collect the data flow and the coverage.
270 // Data flow collection may fail if we request too many DFSan tags at once.
271 // So, we start from requesting all tags in range [0,Size) and if that fails
272 // we then request tags in [0,Size/2) and [Size/2, Size), and so on.
273 // Function number => DFT.
274 auto OutPath
= DirPlusFile(DirPath
, Hash(FileToVector(F
.File
)));
275 std::unordered_map
<size_t, Vector
<uint8_t>> DFTMap
;
276 std::unordered_set
<std::string
> Cov
;
278 Cmd
.addArgument(DFTBinary
);
279 Cmd
.addArgument(F
.File
);
280 Cmd
.addArgument(OutPath
);
281 Printf("CMD: %s\n", Cmd
.toString().c_str());
284 // Write functions.txt if it's currently empty or doesn't exist.
285 auto FunctionsTxtPath
= DirPlusFile(DirPath
, kFunctionsTxt
);
286 if (FileToString(FunctionsTxtPath
).empty()) {
288 Cmd
.addArgument(DFTBinary
);
289 Cmd
.setOutputFile(FunctionsTxtPath
);
295 } // namespace fuzzer