1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // Spawn and orchestrate separate fuzzing processes.
9 //===----------------------------------------------------------------------===//
11 #include "FuzzerCommand.h"
12 #include "FuzzerFork.h"
14 #include "FuzzerInternal.h"
15 #include "FuzzerMerge.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerTracePC.h"
18 #include "FuzzerUtil.h"
22 #include <condition_variable>
33 size_t number_of_executed_units
= 0;
34 size_t peak_rss_mb
= 0;
35 size_t average_exec_per_sec
= 0;
38 static Stats
ParseFinalStatsFromLog(const std::string
&LogPath
) {
39 std::ifstream
In(LogPath
);
46 {"stat::number_of_executed_units:", &Res
.number_of_executed_units
},
47 {"stat::peak_rss_mb:", &Res
.peak_rss_mb
},
48 {"stat::average_exec_per_sec:", &Res
.average_exec_per_sec
},
51 while (std::getline(In
, Line
, '\n')) {
52 if (Line
.find("stat::") != 0) continue;
53 std::istringstream
ISS(Line
);
57 for (size_t i
= 0; NameVarPairs
[i
].Name
; i
++)
58 if (Name
== NameVarPairs
[i
].Name
)
59 *NameVarPairs
[i
].Var
= Val
;
67 std::string CorpusDir
;
68 std::string FeaturesDir
;
70 std::string SeedListPath
;
74 int DftTimeInSeconds
= 0;
82 RemoveFile(SeedListPath
);
83 RmDirRecursive(CorpusDir
);
84 RmDirRecursive(FeaturesDir
);
89 std::vector
<std::string
> Args
;
90 std::vector
<std::string
> CorpusDirs
;
91 std::string MainCorpusDir
;
94 std::string DataFlowBinary
;
95 std::set
<uint32_t> Features
, Cov
;
96 std::set
<std::string
> FilesWithDFT
;
97 std::vector
<std::string
> Files
;
98 std::vector
<std::size_t> FilesSizes
;
100 std::chrono::system_clock::time_point ProcessStartTime
;
105 size_t NumTimeouts
= 0;
107 size_t NumCrashes
= 0;
112 std::string
StopFile() { return DirPlusFile(TempDir
, "STOP"); }
114 size_t secondsSinceProcessStartUp() const {
115 return std::chrono::duration_cast
<std::chrono::seconds
>(
116 std::chrono::system_clock::now() - ProcessStartTime
)
120 FuzzJob
*CreateNewJob(size_t JobId
) {
122 Cmd
.removeFlag("fork");
123 Cmd
.removeFlag("runs");
124 Cmd
.removeFlag("collect_data_flow");
125 for (auto &C
: CorpusDirs
) // Remove all corpora from the args.
126 Cmd
.removeArgument(C
);
127 Cmd
.addFlag("reload", "0"); // working in an isolated dir, no reload.
128 Cmd
.addFlag("print_final_stats", "1");
129 Cmd
.addFlag("print_funcs", "0"); // no need to spend time symbolizing.
130 Cmd
.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId
)));
131 Cmd
.addFlag("stop_file", StopFile());
132 if (!DataFlowBinary
.empty()) {
133 Cmd
.addFlag("data_flow_trace", DFTDir
);
134 if (!Cmd
.hasFlag("focus_function"))
135 Cmd
.addFlag("focus_function", "auto");
137 auto Job
= new FuzzJob
;
139 if (size_t CorpusSubsetSize
=
140 std::min(Files
.size(), (size_t)sqrt(Files
.size() + 2))) {
141 auto Time1
= std::chrono::system_clock::now();
142 if (Group
) { // whether to group the corpus.
143 size_t AverageCorpusSize
= Files
.size() / NumCorpuses
+ 1;
144 size_t StartIndex
= ((JobId
- 1) % NumCorpuses
) * AverageCorpusSize
;
145 for (size_t i
= 0; i
< CorpusSubsetSize
; i
++) {
146 size_t RandNum
= (*Rand
)(AverageCorpusSize
);
147 size_t Index
= RandNum
+ StartIndex
;
148 Index
= Index
< Files
.size() ? Index
149 : Rand
->SkewTowardsLast(Files
.size());
150 auto &SF
= Files
[Index
];
151 Seeds
+= (Seeds
.empty() ? "" : ",") + SF
;
155 for (size_t i
= 0; i
< CorpusSubsetSize
; i
++) {
156 auto &SF
= Files
[Rand
->SkewTowardsLast(Files
.size())];
157 Seeds
+= (Seeds
.empty() ? "" : ",") + SF
;
161 auto Time2
= std::chrono::system_clock::now();
162 auto DftTimeInSeconds
= duration_cast
<seconds
>(Time2
- Time1
).count();
163 assert(DftTimeInSeconds
< std::numeric_limits
<int>::max());
164 Job
->DftTimeInSeconds
= static_cast<int>(DftTimeInSeconds
);
166 if (!Seeds
.empty()) {
168 DirPlusFile(TempDir
, std::to_string(JobId
) + ".seeds");
169 WriteToFile(Seeds
, Job
->SeedListPath
);
170 Cmd
.addFlag("seed_inputs", "@" + Job
->SeedListPath
);
172 Job
->LogPath
= DirPlusFile(TempDir
, std::to_string(JobId
) + ".log");
173 Job
->CorpusDir
= DirPlusFile(TempDir
, "C" + std::to_string(JobId
));
174 Job
->FeaturesDir
= DirPlusFile(TempDir
, "F" + std::to_string(JobId
));
175 Job
->CFPath
= DirPlusFile(TempDir
, std::to_string(JobId
) + ".merge");
179 Cmd
.addArgument(Job
->CorpusDir
);
180 Cmd
.addFlag("features_dir", Job
->FeaturesDir
);
182 for (auto &D
: {Job
->CorpusDir
, Job
->FeaturesDir
}) {
187 Cmd
.setOutputFile(Job
->LogPath
);
188 Cmd
.combineOutAndErr();
193 Printf("Job %zd/%p Created: %s\n", JobId
, Job
,
194 Job
->Cmd
.toString().c_str());
195 // Start from very short runs and gradually increase them.
199 void RunOneMergeJob(FuzzJob
*Job
) {
200 auto Stats
= ParseFinalStatsFromLog(Job
->LogPath
);
201 NumRuns
+= Stats
.number_of_executed_units
;
203 std::vector
<SizedFile
> TempFiles
, MergeCandidates
;
204 // Read all newly created inputs and their feature sets.
205 // Choose only those inputs that have new features.
206 GetSizedFilesFromDir(Job
->CorpusDir
, &TempFiles
);
207 std::sort(TempFiles
.begin(), TempFiles
.end());
208 for (auto &F
: TempFiles
) {
209 auto FeatureFile
= F
.File
;
210 FeatureFile
.replace(0, Job
->CorpusDir
.size(), Job
->FeaturesDir
);
211 auto FeatureBytes
= FileToVector(FeatureFile
, 0, false);
212 assert((FeatureBytes
.size() % sizeof(uint32_t)) == 0);
213 std::vector
<uint32_t> NewFeatures(FeatureBytes
.size() / sizeof(uint32_t));
214 memcpy(NewFeatures
.data(), FeatureBytes
.data(), FeatureBytes
.size());
215 for (auto Ft
: NewFeatures
) {
216 if (!Features
.count(Ft
)) {
217 MergeCandidates
.push_back(F
);
222 // if (!FilesToAdd.empty() || Job->ExitCode != 0)
223 Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd "
224 "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
225 NumRuns
, Cov
.size(), Features
.size(), Files
.size(),
226 Stats
.average_exec_per_sec
, NumOOMs
, NumTimeouts
, NumCrashes
,
227 secondsSinceProcessStartUp(), Job
->JobId
, Job
->DftTimeInSeconds
);
229 if (MergeCandidates
.empty()) return;
231 std::vector
<std::string
> FilesToAdd
;
232 std::set
<uint32_t> NewFeatures
, NewCov
;
233 bool IsSetCoverMerge
=
234 !Job
->Cmd
.getFlagValue("set_cover_merge").compare("1");
235 CrashResistantMerge(Args
, {}, MergeCandidates
, &FilesToAdd
, Features
,
236 &NewFeatures
, Cov
, &NewCov
, Job
->CFPath
, false,
238 for (auto &Path
: FilesToAdd
) {
239 auto U
= FileToVector(Path
);
240 auto NewPath
= DirPlusFile(MainCorpusDir
, Hash(U
));
241 WriteToFile(U
, NewPath
);
242 if (Group
) { // Insert the queue according to the size of the seed.
243 size_t UnitSize
= U
.size();
245 std::upper_bound(FilesSizes
.begin(), FilesSizes
.end(), UnitSize
) -
247 FilesSizes
.insert(FilesSizes
.begin() + Idx
, UnitSize
);
248 Files
.insert(Files
.begin() + Idx
, NewPath
);
250 Files
.push_back(NewPath
);
253 Features
.insert(NewFeatures
.begin(), NewFeatures
.end());
254 Cov
.insert(NewCov
.begin(), NewCov
.end());
255 for (auto Idx
: NewCov
)
256 if (auto *TE
= TPC
.PCTableEntryByIdx(Idx
))
257 if (TPC
.PcIsFuncEntry(TE
))
258 PrintPC(" NEW_FUNC: %p %F %L\n", "",
259 TPC
.GetNextInstructionPc(TE
->PC
));
262 void CollectDFT(const std::string
&InputPath
) {
263 if (DataFlowBinary
.empty()) return;
264 if (!FilesWithDFT
.insert(InputPath
).second
) return;
266 Cmd
.removeFlag("fork");
267 Cmd
.removeFlag("runs");
268 Cmd
.addFlag("data_flow_trace", DFTDir
);
269 Cmd
.addArgument(InputPath
);
270 for (auto &C
: CorpusDirs
) // Remove all corpora from the args.
271 Cmd
.removeArgument(C
);
272 Cmd
.setOutputFile(DirPlusFile(TempDir
, "dft.log"));
273 Cmd
.combineOutAndErr();
274 // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
281 std::queue
<FuzzJob
*> Qu
;
283 std::condition_variable Cv
;
285 void Push(FuzzJob
*Job
) {
287 std::lock_guard
<std::mutex
> Lock(Mu
);
293 std::unique_lock
<std::mutex
> Lk(Mu
);
294 // std::lock_guard<std::mutex> Lock(Mu);
295 Cv
.wait(Lk
, [&]{return !Qu
.empty();});
297 auto Job
= Qu
.front();
303 void WorkerThread(JobQueue
*FuzzQ
, JobQueue
*MergeQ
) {
304 while (auto Job
= FuzzQ
->Pop()) {
305 // Printf("WorkerThread: job %p\n", Job);
306 Job
->ExitCode
= ExecuteCommand(Job
->Cmd
);
311 // This is just a skeleton of an experimental -fork=1 feature.
312 void FuzzWithFork(Random
&Rand
, const FuzzingOptions
&Options
,
313 const std::vector
<std::string
> &Args
,
314 const std::vector
<std::string
> &CorpusDirs
, int NumJobs
) {
315 Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs
);
319 Env
.CorpusDirs
= CorpusDirs
;
321 Env
.Verbosity
= Options
.Verbosity
;
322 Env
.ProcessStartTime
= std::chrono::system_clock::now();
323 Env
.DataFlowBinary
= Options
.CollectDataFlow
;
324 Env
.Group
= Options
.ForkCorpusGroups
;
326 std::vector
<SizedFile
> SeedFiles
;
327 for (auto &Dir
: CorpusDirs
)
328 GetSizedFilesFromDir(Dir
, &SeedFiles
);
329 std::sort(SeedFiles
.begin(), SeedFiles
.end());
330 Env
.TempDir
= TempPath("FuzzWithFork", ".dir");
331 Env
.DFTDir
= DirPlusFile(Env
.TempDir
, "DFT");
332 RmDirRecursive(Env
.TempDir
); // in case there is a leftover from old runs.
337 if (CorpusDirs
.empty())
338 MkDir(Env
.MainCorpusDir
= DirPlusFile(Env
.TempDir
, "C"));
340 Env
.MainCorpusDir
= CorpusDirs
[0];
342 if (Options
.KeepSeed
) {
343 for (auto &File
: SeedFiles
)
344 Env
.Files
.push_back(File
.File
);
346 auto CFPath
= DirPlusFile(Env
.TempDir
, "merge.txt");
347 std::set
<uint32_t> NewFeatures
, NewCov
;
348 CrashResistantMerge(Env
.Args
, {}, SeedFiles
, &Env
.Files
, Env
.Features
,
349 &NewFeatures
, Env
.Cov
, &NewCov
, CFPath
,
350 /*Verbose=*/false, /*IsSetCoverMerge=*/false);
351 Env
.Features
.insert(NewFeatures
.begin(), NewFeatures
.end());
352 Env
.Cov
.insert(NewFeatures
.begin(), NewFeatures
.end());
357 for (auto &path
: Env
.Files
)
358 Env
.FilesSizes
.push_back(FileSize(path
));
361 Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs
,
362 Env
.Files
.size(), Env
.TempDir
.c_str());
366 JobQueue FuzzQ
, MergeQ
;
368 auto StopJobs
= [&]() {
369 for (int i
= 0; i
< NumJobs
; i
++)
371 MergeQ
.Push(nullptr);
372 WriteToFile(Unit({1}), Env
.StopFile());
375 size_t MergeCycle
= 20;
376 size_t JobExecuted
= 0;
378 std::vector
<std::thread
> Threads
;
379 for (int t
= 0; t
< NumJobs
; t
++) {
380 Threads
.push_back(std::thread(WorkerThread
, &FuzzQ
, &MergeQ
));
381 FuzzQ
.Push(Env
.CreateNewJob(JobId
++));
385 std::unique_ptr
<FuzzJob
> Job(MergeQ
.Pop());
388 ExitCode
= Job
->ExitCode
;
389 if (ExitCode
== Options
.InterruptExitCode
) {
390 Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
394 Fuzzer::MaybeExitGracefully();
396 Env
.RunOneMergeJob(Job
.get());
398 // merge the corpus .
400 if (Env
.Group
&& JobExecuted
>= MergeCycle
) {
401 std::vector
<SizedFile
> CurrentSeedFiles
;
402 for (auto &Dir
: CorpusDirs
)
403 GetSizedFilesFromDir(Dir
, &CurrentSeedFiles
);
404 std::sort(CurrentSeedFiles
.begin(), CurrentSeedFiles
.end());
406 auto CFPath
= DirPlusFile(Env
.TempDir
, "merge.txt");
407 std::set
<uint32_t> TmpNewFeatures
, TmpNewCov
;
408 std::set
<uint32_t> TmpFeatures
, TmpCov
;
410 Env
.FilesSizes
.clear();
411 CrashResistantMerge(Env
.Args
, {}, CurrentSeedFiles
, &Env
.Files
,
412 TmpFeatures
, &TmpNewFeatures
, TmpCov
, &TmpNewCov
,
413 CFPath
, /*Verbose=*/false, /*IsSetCoverMerge=*/false);
414 for (auto &path
: Env
.Files
)
415 Env
.FilesSizes
.push_back(FileSize(path
));
421 // Since the number of corpus seeds will gradually increase, in order to
422 // control the number in each group to be about three times the number of
423 // seeds selected each time, the number of groups is dynamically adjusted.
424 if (Env
.Files
.size() < 2000)
425 Env
.NumCorpuses
= 12;
426 else if (Env
.Files
.size() < 6000)
427 Env
.NumCorpuses
= 20;
428 else if (Env
.Files
.size() < 12000)
429 Env
.NumCorpuses
= 32;
430 else if (Env
.Files
.size() < 16000)
431 Env
.NumCorpuses
= 40;
432 else if (Env
.Files
.size() < 24000)
433 Env
.NumCorpuses
= 60;
435 Env
.NumCorpuses
= 80;
437 // Continue if our crash is one of the ignored ones.
438 if (Options
.IgnoreTimeouts
&& ExitCode
== Options
.TimeoutExitCode
)
440 else if (Options
.IgnoreOOMs
&& ExitCode
== Options
.OOMExitCode
)
442 else if (ExitCode
!= 0) {
444 if (Options
.IgnoreCrashes
) {
445 std::ifstream
In(Job
->LogPath
);
447 while (std::getline(In
, Line
, '\n'))
448 if (Line
.find("ERROR:") != Line
.npos
||
449 Line
.find("runtime error:") != Line
.npos
)
450 Printf("%s\n", Line
.c_str());
452 // And exit if we don't ignore this crash.
453 Printf("INFO: log from the inner process:\n%s",
454 FileToString(Job
->LogPath
).c_str());
460 // Stop if we are over the time budget.
461 // This is not precise, since other threads are still running
462 // and we will wait while joining them.
463 // We also don't stop instantly: other jobs need to finish.
464 if (Options
.MaxTotalTimeSec
> 0 &&
465 Env
.secondsSinceProcessStartUp() >= (size_t)Options
.MaxTotalTimeSec
) {
466 Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
467 Env
.secondsSinceProcessStartUp());
471 if (Env
.NumRuns
>= Options
.MaxNumberOfRuns
) {
472 Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
478 FuzzQ
.Push(Env
.CreateNewJob(JobId
++));
481 for (auto &T
: Threads
)
484 // The workers have terminated. Don't try to remove the directory before they
485 // terminate to avoid a race condition preventing cleanup on Windows.
486 RmDirRecursive(Env
.TempDir
);
488 // Use the exit code from the last child process.
489 Printf("INFO: exiting: %d time: %zds\n", ExitCode
,
490 Env
.secondsSinceProcessStartUp());
494 } // namespace fuzzer