1 /*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // An experimental data-flow tracer for fuzz targets.
9 // It is based on DFSan and SanitizerCoverage.
10 // https://clang.llvm.org/docs/DataFlowSanitizer.html
11 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
13 // It executes the fuzz target on the given input while monitoring the
14 // data flow for every instrumented comparison instruction.
16 // The output shows which functions depend on which bytes of the input,
17 // and also provides basic-block coverage for every input.
20 // 1. Compile this file (DataFlow.cpp) with -fsanitize=dataflow and -O2.
21 // 2. Compile DataFlowCallbacks.cpp with -O2 -fPIC.
22 // 3. Build the fuzz target with -g -fsanitize=dataflow
23 // -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
24 // 4. Link those together with -fsanitize=dataflow
26 // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
27 // instruction, DFSan modifies the calls to pass the data flow labels.
28 // The callbacks update the data flow label for the current function.
29 // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
31 // -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function
32 // entries so that the comparison callback knows that current function.
33 // -fsanitize-coverage=...,bb also allows to collect basic block coverage.
37 // # Collect data flow and coverage for INPUT_FILE
38 // # write to OUTPUT_FILE (default: stdout)
39 // export DFSAN_OPTIONS=warn_unimplemented=0
40 // ./a.out INPUT_FILE [OUTPUT_FILE]
42 // # Print all instrumented functions. llvm-symbolizer must be present in PATH
52 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
53 // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
54 // in addition to the function's entry block, out of T total instrumented
57 //===----------------------------------------------------------------------===*/
65 #include <execinfo.h> // backtrace_symbols_fd
70 extern int LLVMFuzzerTestOneInput(const unsigned char *Data
, size_t Size
);
71 __attribute__((weak
)) extern int LLVMFuzzerInitialize(int *argc
, char ***argv
);
75 static size_t InputLen
;
76 static size_t NumIterations
;
77 static dfsan_label
**FuncLabelsPerIter
; // NumIterations x NumFuncs;
79 static inline bool BlockIsEntry(size_t BlockIdx
) {
80 return __dft
.PCsBeg
[BlockIdx
* 2 + 1] & PCFLAG_FUNC_ENTRY
;
83 const int kNumLabels
= 8;
85 // Prints all instrumented functions.
86 static int PrintFunctions() {
87 // We don't have the symbolizer integrated with dfsan yet.
88 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
89 // TODO(kcc): this is pretty ugly and may break in lots of ways.
90 // We'll need to make a proper in-process symbolizer work with DFSan.
91 FILE *Pipe
= popen("sed 's/(+/ /g; s/).*//g' "
94 "| sed 's/\\.dfsan//g' "
97 for (size_t I
= 0; I
< __dft
.NumGuards
; I
++) {
98 uintptr_t PC
= __dft
.PCsBeg
[I
* 2];
99 if (!BlockIsEntry(I
)) continue;
100 void *const Buf
[1] = {(void*)PC
};
101 backtrace_symbols_fd(Buf
, 1, fileno(Pipe
));
107 static void PrintBinary(FILE *Out
, dfsan_label L
, size_t Len
) {
108 char buf
[kNumLabels
+ 1];
109 assert(Len
<= kNumLabels
);
110 for (int i
= 0; i
< kNumLabels
; i
++)
111 buf
[i
] = (L
& (1 << i
)) ? '1' : '0';
113 fprintf(Out
, "%s", buf
);
116 static void PrintDataFlow(FILE *Out
) {
117 for (size_t Func
= 0; Func
< __dft
.NumFuncs
; Func
++) {
119 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++)
120 if (FuncLabelsPerIter
[Iter
][Func
])
124 fprintf(Out
, "F%zd ", Func
);
125 size_t LenOfLastIteration
= kNumLabels
;
126 if (auto Tail
= InputLen
% kNumLabels
)
127 LenOfLastIteration
= Tail
;
128 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++)
129 PrintBinary(Out
, FuncLabelsPerIter
[Iter
][Func
],
130 Iter
== NumIterations
- 1 ? LenOfLastIteration
: kNumLabels
);
135 static void PrintCoverage(FILE *Out
) {
136 ssize_t CurrentFuncGuard
= -1;
137 ssize_t CurrentFuncNum
= -1;
138 ssize_t NumBlocksInCurrentFunc
= -1;
139 for (size_t FuncBeg
= 0; FuncBeg
< __dft
.NumGuards
;) {
141 assert(BlockIsEntry(FuncBeg
));
142 size_t FuncEnd
= FuncBeg
+ 1;
143 for (; FuncEnd
< __dft
.NumGuards
&& !BlockIsEntry(FuncEnd
); FuncEnd
++)
145 if (__dft
.BBExecuted
[FuncBeg
]) {
146 fprintf(Out
, "C%zd", CurrentFuncNum
);
147 for (size_t I
= FuncBeg
+ 1; I
< FuncEnd
; I
++)
148 if (__dft
.BBExecuted
[I
])
149 fprintf(Out
, " %zd", I
- FuncBeg
);
150 fprintf(Out
, " %zd\n", FuncEnd
- FuncBeg
);
156 int main(int argc
, char **argv
) {
157 if (LLVMFuzzerInitialize
)
158 LLVMFuzzerInitialize(&argc
, &argv
);
160 return PrintFunctions();
161 assert(argc
== 2 || argc
== 3);
163 const char *Input
= argv
[1];
164 fprintf(stderr
, "INFO: reading '%s'\n", Input
);
165 FILE *In
= fopen(Input
, "r");
167 fseek(In
, 0, SEEK_END
);
168 InputLen
= ftell(In
);
169 fseek(In
, 0, SEEK_SET
);
170 unsigned char *Buf
= (unsigned char*)malloc(InputLen
);
171 size_t NumBytesRead
= fread(Buf
, 1, InputLen
, In
);
172 assert(NumBytesRead
== InputLen
);
175 NumIterations
= (NumBytesRead
+ kNumLabels
- 1) / kNumLabels
;
177 (dfsan_label
**)calloc(NumIterations
, sizeof(dfsan_label
*));
178 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++)
179 FuncLabelsPerIter
[Iter
] =
180 (dfsan_label
*)calloc(__dft
.NumFuncs
, sizeof(dfsan_label
));
182 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++) {
183 fprintf(stderr
, "INFO: running '%s' %zd/%zd\n", Input
, Iter
, NumIterations
);
185 dfsan_set_label(0, Buf
, InputLen
);
186 __dft
.FuncLabels
= FuncLabelsPerIter
[Iter
];
188 size_t BaseIdx
= Iter
* kNumLabels
;
189 size_t LastIdx
= BaseIdx
+ kNumLabels
< NumBytesRead
? BaseIdx
+ kNumLabels
191 assert(BaseIdx
< LastIdx
);
192 for (size_t Idx
= BaseIdx
; Idx
< LastIdx
; Idx
++)
193 dfsan_set_label(1 << (Idx
- BaseIdx
), Buf
+ Idx
, 1);
194 LLVMFuzzerTestOneInput(Buf
, InputLen
);
198 bool OutIsStdout
= argc
== 2;
199 fprintf(stderr
, "INFO: writing dataflow to %s\n",
200 OutIsStdout
? "<stdout>" : argv
[2]);
201 FILE *Out
= OutIsStdout
? stdout
: fopen(argv
[2], "w");
204 if (!OutIsStdout
) fclose(Out
);