1 //===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // fuzzer::DataFlowTrace
9 //===----------------------------------------------------------------------===//
11 #include "FuzzerDataFlowTrace.h"
13 #include "FuzzerCommand.h"
15 #include "FuzzerRandom.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerUtil.h"
25 #include <unordered_map>
26 #include <unordered_set>
30 static const char *kFunctionsTxt
= "functions.txt";
32 bool BlockCoverage::AppendCoverage(const std::string
&S
) {
33 std::stringstream
SS(S
);
34 return AppendCoverage(SS
);
37 // Coverage lines have this form:
39 // where N is the number of the function, T is the total number of instrumented
40 // BBs, and X,Y,Z, if present, are the indecies of covered BB.
41 // BB #0, which is the entry block, is not explicitly listed.
42 bool BlockCoverage::AppendCoverage(std::istream
&IN
) {
44 while (std::getline(IN
, L
, '\n')) {
47 std::stringstream
SS(L
.c_str() + 1);
48 size_t FunctionId
= 0;
51 FunctionsWithDFT
.insert(FunctionId
);
54 if (L
[0] != 'C') continue;
55 Vector
<uint32_t> CoveredBlocks
;
60 CoveredBlocks
.push_back(BB
);
62 if (CoveredBlocks
.empty()) return false;
63 uint32_t NumBlocks
= CoveredBlocks
.back();
64 CoveredBlocks
.pop_back();
65 for (auto BB
: CoveredBlocks
)
66 if (BB
>= NumBlocks
) return false;
67 auto It
= Functions
.find(FunctionId
);
70 ? Functions
.insert({FunctionId
, Vector
<uint32_t>(NumBlocks
)})
74 if (Counters
.size() != NumBlocks
) return false; // wrong number of blocks.
77 for (auto BB
: CoveredBlocks
)
83 // Assign weights to each function.
84 // General principles:
85 // * any uncovered function gets weight 0.
86 // * a function with lots of uncovered blocks gets bigger weight.
87 // * a function with a less frequently executed code gets bigger weight.
88 Vector
<double> BlockCoverage::FunctionWeights(size_t NumFunctions
) const {
89 Vector
<double> Res(NumFunctions
);
90 for (auto It
: Functions
) {
91 auto FunctionID
= It
.first
;
92 auto Counters
= It
.second
;
93 assert(FunctionID
< NumFunctions
);
94 auto &Weight
= Res
[FunctionID
];
95 // Give higher weight if the function has a DFT.
96 Weight
= FunctionsWithDFT
.count(FunctionID
) ? 1000. : 1;
97 // Give higher weight to functions with less frequently seen basic blocks.
98 Weight
/= SmallestNonZeroCounter(Counters
);
99 // Give higher weight to functions with the most uncovered basic blocks.
100 Weight
*= NumberOfUncoveredBlocks(Counters
) + 1;
105 void DataFlowTrace::ReadCoverage(const std::string
&DirPath
) {
106 Vector
<SizedFile
> Files
;
107 GetSizedFilesFromDir(DirPath
, &Files
);
108 for (auto &SF
: Files
) {
109 auto Name
= Basename(SF
.File
);
110 if (Name
== kFunctionsTxt
) continue;
111 if (!CorporaHashes
.count(Name
)) continue;
112 std::ifstream
IF(SF
.File
);
113 Coverage
.AppendCoverage(IF
);
117 static void DFTStringAppendToVector(Vector
<uint8_t> *DFT
,
118 const std::string
&DFTString
) {
119 assert(DFT
->size() == DFTString
.size());
120 for (size_t I
= 0, Len
= DFT
->size(); I
< Len
; I
++)
121 (*DFT
)[I
] = DFTString
[I
] == '1';
124 // converts a string of '0' and '1' into a Vector<uint8_t>
125 static Vector
<uint8_t> DFTStringToVector(const std::string
&DFTString
) {
126 Vector
<uint8_t> DFT(DFTString
.size());
127 DFTStringAppendToVector(&DFT
, DFTString
);
131 static bool ParseError(const char *Err
, const std::string
&Line
) {
132 Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err
, Line
.c_str());
136 // TODO(metzman): replace std::string with std::string_view for
137 // better performance. Need to figure our how to use string_view on Windows.
138 static bool ParseDFTLine(const std::string
&Line
, size_t *FunctionNum
,
139 std::string
*DFTString
) {
140 if (!Line
.empty() && Line
[0] != 'F')
141 return false; // Ignore coverage.
142 size_t SpacePos
= Line
.find(' ');
143 if (SpacePos
== std::string::npos
)
144 return ParseError("no space in the trace line", Line
);
145 if (Line
.empty() || Line
[0] != 'F')
146 return ParseError("the trace line doesn't start with 'F'", Line
);
147 *FunctionNum
= std::atol(Line
.c_str() + 1);
148 const char *Beg
= Line
.c_str() + SpacePos
+ 1;
149 const char *End
= Line
.c_str() + Line
.size();
151 size_t Len
= End
- Beg
;
152 for (size_t I
= 0; I
< Len
; I
++) {
153 if (Beg
[I
] != '0' && Beg
[I
] != '1')
154 return ParseError("the trace should contain only 0 or 1", Line
);
160 bool DataFlowTrace::Init(const std::string
&DirPath
, std::string
*FocusFunction
,
161 Vector
<SizedFile
> &CorporaFiles
, Random
&Rand
) {
162 if (DirPath
.empty()) return false;
163 Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath
.c_str());
164 Vector
<SizedFile
> Files
;
165 GetSizedFilesFromDir(DirPath
, &Files
);
167 size_t FocusFuncIdx
= SIZE_MAX
;
168 Vector
<std::string
> FunctionNames
;
170 // Collect the hashes of the corpus files.
171 for (auto &SF
: CorporaFiles
)
172 CorporaHashes
.insert(Hash(FileToVector(SF
.File
)));
174 // Read functions.txt
175 std::ifstream
IF(DirPlusFile(DirPath
, kFunctionsTxt
));
176 size_t NumFunctions
= 0;
177 while (std::getline(IF
, L
, '\n')) {
178 FunctionNames
.push_back(L
);
180 if (*FocusFunction
== L
)
181 FocusFuncIdx
= NumFunctions
- 1;
186 if (*FocusFunction
== "auto") {
187 // AUTOFOCUS works like this:
188 // * reads the coverage data from the DFT files.
189 // * assigns weights to functions based on coverage.
190 // * chooses a random function according to the weights.
191 ReadCoverage(DirPath
);
192 auto Weights
= Coverage
.FunctionWeights(NumFunctions
);
193 Vector
<double> Intervals(NumFunctions
+ 1);
194 std::iota(Intervals
.begin(), Intervals
.end(), 0);
195 auto Distribution
= std::piecewise_constant_distribution
<double>(
196 Intervals
.begin(), Intervals
.end(), Weights
.begin());
197 FocusFuncIdx
= static_cast<size_t>(Distribution(Rand
));
198 *FocusFunction
= FunctionNames
[FocusFuncIdx
];
199 assert(FocusFuncIdx
< NumFunctions
);
200 Printf("INFO: AUTOFOCUS: %zd %s\n", FocusFuncIdx
,
201 FunctionNames
[FocusFuncIdx
].c_str());
202 for (size_t i
= 0; i
< NumFunctions
; i
++) {
203 if (!Weights
[i
]) continue;
204 Printf(" [%zd] W %g\tBB-tot %u\tBB-cov %u\tEntryFreq %u:\t%s\n", i
,
205 Weights
[i
], Coverage
.GetNumberOfBlocks(i
),
206 Coverage
.GetNumberOfCoveredBlocks(i
), Coverage
.GetCounter(i
, 0),
207 FunctionNames
[i
].c_str());
211 if (!NumFunctions
|| FocusFuncIdx
== SIZE_MAX
|| Files
.size() <= 1)
215 size_t NumTraceFiles
= 0;
216 size_t NumTracesWithFocusFunction
= 0;
217 for (auto &SF
: Files
) {
218 auto Name
= Basename(SF
.File
);
219 if (Name
== kFunctionsTxt
) continue;
220 if (!CorporaHashes
.count(Name
)) continue; // not in the corpus.
222 // Printf("=== %s\n", Name.c_str());
223 std::ifstream
IF(SF
.File
);
224 while (std::getline(IF
, L
, '\n')) {
225 size_t FunctionNum
= 0;
226 std::string DFTString
;
227 if (ParseDFTLine(L
, &FunctionNum
, &DFTString
) &&
228 FunctionNum
== FocusFuncIdx
) {
229 NumTracesWithFocusFunction
++;
231 if (FunctionNum
>= NumFunctions
)
232 return ParseError("N is greater than the number of functions", L
);
233 Traces
[Name
] = DFTStringToVector(DFTString
);
234 // Print just a few small traces.
235 if (NumTracesWithFocusFunction
<= 3 && DFTString
.size() <= 16)
236 Printf("%s => |%s|\n", Name
.c_str(), std::string(DFTString
).c_str());
237 break; // No need to parse the following lines.
241 Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
242 "%zd traces with focus function\n",
243 NumTraceFiles
, NumFunctions
, NumTracesWithFocusFunction
);
244 return NumTraceFiles
> 0;
247 int CollectDataFlow(const std::string
&DFTBinary
, const std::string
&DirPath
,
248 const Vector
<SizedFile
> &CorporaFiles
) {
249 Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
250 DFTBinary
.c_str(), DirPath
.c_str(), CorporaFiles
.size());
251 if (CorporaFiles
.empty()) {
252 Printf("ERROR: can't collect data flow without corpus provided.");
256 static char DFSanEnv
[] = "DFSAN_OPTIONS=warn_unimplemented=0";
259 for (auto &F
: CorporaFiles
) {
260 // For every input F we need to collect the data flow and the coverage.
261 // Data flow collection may fail if we request too many DFSan tags at once.
262 // So, we start from requesting all tags in range [0,Size) and if that fails
263 // we then request tags in [0,Size/2) and [Size/2, Size), and so on.
264 // Function number => DFT.
265 auto OutPath
= DirPlusFile(DirPath
, Hash(FileToVector(F
.File
)));
266 std::unordered_map
<size_t, Vector
<uint8_t>> DFTMap
;
267 std::unordered_set
<std::string
> Cov
;
269 Cmd
.addArgument(DFTBinary
);
270 Cmd
.addArgument(F
.File
);
271 Cmd
.addArgument(OutPath
);
272 Printf("CMD: %s\n", Cmd
.toString().c_str());
275 // Write functions.txt if it's currently empty or doesn't exist.
276 auto FunctionsTxtPath
= DirPlusFile(DirPath
, kFunctionsTxt
);
277 if (FileToString(FunctionsTxtPath
).empty()) {
279 Cmd
.addArgument(DFTBinary
);
280 Cmd
.setOutputFile(FunctionsTxtPath
);
286 } // namespace fuzzer