1 //===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // fuzzer::DataFlowTrace
9 //===----------------------------------------------------------------------===//
11 #include "FuzzerDataFlowTrace.h"
13 #include "FuzzerCommand.h"
15 #include "FuzzerRandom.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerUtil.h"
25 #include <unordered_map>
26 #include <unordered_set>
30 static const char *kFunctionsTxt
= "functions.txt";
32 bool BlockCoverage::AppendCoverage(const std::string
&S
) {
33 std::stringstream
SS(S
);
34 return AppendCoverage(SS
);
37 // Coverage lines have this form:
39 // where N is the number of the function, T is the total number of instrumented
40 // BBs, and X,Y,Z, if present, are the indices of covered BB.
41 // BB #0, which is the entry block, is not explicitly listed.
42 bool BlockCoverage::AppendCoverage(std::istream
&IN
) {
44 while (std::getline(IN
, L
, '\n')) {
47 std::stringstream
SS(L
.c_str() + 1);
48 size_t FunctionId
= 0;
51 FunctionsWithDFT
.insert(FunctionId
);
54 if (L
[0] != 'C') continue;
55 std::vector
<uint32_t> CoveredBlocks
;
60 CoveredBlocks
.push_back(BB
);
62 if (CoveredBlocks
.empty()) return false;
63 // Ensures no CoverageVector is longer than UINT32_MAX.
64 uint32_t NumBlocks
= CoveredBlocks
.back();
65 CoveredBlocks
.pop_back();
66 for (auto BB
: CoveredBlocks
)
67 if (BB
>= NumBlocks
) return false;
68 auto It
= Functions
.find(FunctionId
);
71 ? Functions
.insert({FunctionId
, std::vector
<uint32_t>(NumBlocks
)})
75 if (Counters
.size() != NumBlocks
) return false; // wrong number of blocks.
78 for (auto BB
: CoveredBlocks
)
84 // Assign weights to each function.
85 // General principles:
86 // * any uncovered function gets weight 0.
87 // * a function with lots of uncovered blocks gets bigger weight.
88 // * a function with a less frequently executed code gets bigger weight.
89 std::vector
<double> BlockCoverage::FunctionWeights(size_t NumFunctions
) const {
90 std::vector
<double> Res(NumFunctions
);
91 for (const auto &It
: Functions
) {
92 auto FunctionID
= It
.first
;
93 auto Counters
= It
.second
;
94 assert(FunctionID
< NumFunctions
);
95 auto &Weight
= Res
[FunctionID
];
96 // Give higher weight if the function has a DFT.
97 Weight
= FunctionsWithDFT
.count(FunctionID
) ? 1000. : 1;
98 // Give higher weight to functions with less frequently seen basic blocks.
99 Weight
/= SmallestNonZeroCounter(Counters
);
100 // Give higher weight to functions with the most uncovered basic blocks.
101 Weight
*= NumberOfUncoveredBlocks(Counters
) + 1;
106 void DataFlowTrace::ReadCoverage(const std::string
&DirPath
) {
107 std::vector
<SizedFile
> Files
;
108 GetSizedFilesFromDir(DirPath
, &Files
);
109 for (auto &SF
: Files
) {
110 auto Name
= Basename(SF
.File
);
111 if (Name
== kFunctionsTxt
) continue;
112 if (!CorporaHashes
.count(Name
)) continue;
113 std::ifstream
IF(SF
.File
);
114 Coverage
.AppendCoverage(IF
);
118 static void DFTStringAppendToVector(std::vector
<uint8_t> *DFT
,
119 const std::string
&DFTString
) {
120 assert(DFT
->size() == DFTString
.size());
121 for (size_t I
= 0, Len
= DFT
->size(); I
< Len
; I
++)
122 (*DFT
)[I
] = DFTString
[I
] == '1';
125 // converts a string of '0' and '1' into a std::vector<uint8_t>
126 static std::vector
<uint8_t> DFTStringToVector(const std::string
&DFTString
) {
127 std::vector
<uint8_t> DFT(DFTString
.size());
128 DFTStringAppendToVector(&DFT
, DFTString
);
132 static bool ParseError(const char *Err
, const std::string
&Line
) {
133 Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err
, Line
.c_str());
137 // TODO(metzman): replace std::string with std::string_view for
138 // better performance. Need to figure our how to use string_view on Windows.
139 static bool ParseDFTLine(const std::string
&Line
, size_t *FunctionNum
,
140 std::string
*DFTString
) {
141 if (!Line
.empty() && Line
[0] != 'F')
142 return false; // Ignore coverage.
143 size_t SpacePos
= Line
.find(' ');
144 if (SpacePos
== std::string::npos
)
145 return ParseError("no space in the trace line", Line
);
146 if (Line
.empty() || Line
[0] != 'F')
147 return ParseError("the trace line doesn't start with 'F'", Line
);
148 *FunctionNum
= std::atol(Line
.c_str() + 1);
149 const char *Beg
= Line
.c_str() + SpacePos
+ 1;
150 const char *End
= Line
.c_str() + Line
.size();
152 size_t Len
= End
- Beg
;
153 for (size_t I
= 0; I
< Len
; I
++) {
154 if (Beg
[I
] != '0' && Beg
[I
] != '1')
155 return ParseError("the trace should contain only 0 or 1", Line
);
161 bool DataFlowTrace::Init(const std::string
&DirPath
, std::string
*FocusFunction
,
162 std::vector
<SizedFile
> &CorporaFiles
, Random
&Rand
) {
163 if (DirPath
.empty()) return false;
164 Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath
.c_str());
165 std::vector
<SizedFile
> Files
;
166 GetSizedFilesFromDir(DirPath
, &Files
);
168 size_t FocusFuncIdx
= SIZE_MAX
;
169 std::vector
<std::string
> FunctionNames
;
171 // Collect the hashes of the corpus files.
172 for (auto &SF
: CorporaFiles
)
173 CorporaHashes
.insert(Hash(FileToVector(SF
.File
)));
175 // Read functions.txt
176 std::ifstream
IF(DirPlusFile(DirPath
, kFunctionsTxt
));
177 size_t NumFunctions
= 0;
178 while (std::getline(IF
, L
, '\n')) {
179 FunctionNames
.push_back(L
);
181 if (*FocusFunction
== L
)
182 FocusFuncIdx
= NumFunctions
- 1;
187 if (*FocusFunction
== "auto") {
188 // AUTOFOCUS works like this:
189 // * reads the coverage data from the DFT files.
190 // * assigns weights to functions based on coverage.
191 // * chooses a random function according to the weights.
192 ReadCoverage(DirPath
);
193 auto Weights
= Coverage
.FunctionWeights(NumFunctions
);
194 std::vector
<double> Intervals(NumFunctions
+ 1);
195 std::iota(Intervals
.begin(), Intervals
.end(), 0);
196 auto Distribution
= std::piecewise_constant_distribution
<double>(
197 Intervals
.begin(), Intervals
.end(), Weights
.begin());
198 FocusFuncIdx
= static_cast<size_t>(Distribution(Rand
));
199 *FocusFunction
= FunctionNames
[FocusFuncIdx
];
200 assert(FocusFuncIdx
< NumFunctions
);
201 Printf("INFO: AUTOFOCUS: %zd %s\n", FocusFuncIdx
,
202 FunctionNames
[FocusFuncIdx
].c_str());
203 for (size_t i
= 0; i
< NumFunctions
; i
++) {
204 if (Weights
[i
] == 0.0)
206 Printf(" [%zd] W %g\tBB-tot %u\tBB-cov %u\tEntryFreq %u:\t%s\n", i
,
207 Weights
[i
], Coverage
.GetNumberOfBlocks(i
),
208 Coverage
.GetNumberOfCoveredBlocks(i
), Coverage
.GetCounter(i
, 0),
209 FunctionNames
[i
].c_str());
213 if (!NumFunctions
|| FocusFuncIdx
== SIZE_MAX
|| Files
.size() <= 1)
217 size_t NumTraceFiles
= 0;
218 size_t NumTracesWithFocusFunction
= 0;
219 for (auto &SF
: Files
) {
220 auto Name
= Basename(SF
.File
);
221 if (Name
== kFunctionsTxt
) continue;
222 if (!CorporaHashes
.count(Name
)) continue; // not in the corpus.
224 // Printf("=== %s\n", Name.c_str());
225 std::ifstream
IF(SF
.File
);
226 while (std::getline(IF
, L
, '\n')) {
227 size_t FunctionNum
= 0;
228 std::string DFTString
;
229 if (ParseDFTLine(L
, &FunctionNum
, &DFTString
) &&
230 FunctionNum
== FocusFuncIdx
) {
231 NumTracesWithFocusFunction
++;
233 if (FunctionNum
>= NumFunctions
)
234 return ParseError("N is greater than the number of functions", L
);
235 Traces
[Name
] = DFTStringToVector(DFTString
);
236 // Print just a few small traces.
237 if (NumTracesWithFocusFunction
<= 3 && DFTString
.size() <= 16)
238 Printf("%s => |%s|\n", Name
.c_str(), std::string(DFTString
).c_str());
239 break; // No need to parse the following lines.
243 Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
244 "%zd traces with focus function\n",
245 NumTraceFiles
, NumFunctions
, NumTracesWithFocusFunction
);
246 return NumTraceFiles
> 0;
249 int CollectDataFlow(const std::string
&DFTBinary
, const std::string
&DirPath
,
250 const std::vector
<SizedFile
> &CorporaFiles
) {
251 Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
252 DFTBinary
.c_str(), DirPath
.c_str(), CorporaFiles
.size());
253 if (CorporaFiles
.empty()) {
254 Printf("ERROR: can't collect data flow without corpus provided.");
258 static char DFSanEnv
[] = "DFSAN_OPTIONS=warn_unimplemented=0";
261 for (auto &F
: CorporaFiles
) {
262 // For every input F we need to collect the data flow and the coverage.
263 // Data flow collection may fail if we request too many DFSan tags at once.
264 // So, we start from requesting all tags in range [0,Size) and if that fails
265 // we then request tags in [0,Size/2) and [Size/2, Size), and so on.
266 // Function number => DFT.
267 auto OutPath
= DirPlusFile(DirPath
, Hash(FileToVector(F
.File
)));
268 std::unordered_map
<size_t, std::vector
<uint8_t>> DFTMap
;
269 std::unordered_set
<std::string
> Cov
;
271 Cmd
.addArgument(DFTBinary
);
272 Cmd
.addArgument(F
.File
);
273 Cmd
.addArgument(OutPath
);
274 Printf("CMD: %s\n", Cmd
.toString().c_str());
277 // Write functions.txt if it's currently empty or doesn't exist.
278 auto FunctionsTxtPath
= DirPlusFile(DirPath
, kFunctionsTxt
);
279 if (FileToString(FunctionsTxtPath
).empty()) {
281 Cmd
.addArgument(DFTBinary
);
282 Cmd
.setOutputFile(FunctionsTxtPath
);
288 } // namespace fuzzer