1 /*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // An experimental data-flow tracer for fuzz targets.
9 // It is based on DFSan and SanitizerCoverage.
10 // https://clang.llvm.org/docs/DataFlowSanitizer.html
11 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
13 // It executes the fuzz target on the given input while monitoring the
14 // data flow for every instrumented comparison instruction.
16 // The output shows which functions depend on which bytes of the input,
17 // and also provides basic-block coverage for every input.
20 // 1. Compile this file (DataFlow.cpp) with -fsanitize=dataflow -mllvm
21 // -dfsan-fast-16-labels and -O2.
22 // 2. Compile DataFlowCallbacks.cpp with -O2 -fPIC.
23 // 3. Build the fuzz target with -g -fsanitize=dataflow
24 // -mllvm -dfsan-fast-16-labels
25 // -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
26 // 4. Link those together with -fsanitize=dataflow
28 // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
29 // instruction, DFSan modifies the calls to pass the data flow labels.
30 // The callbacks update the data flow label for the current function.
31 // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
33 // -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function
34 // entries so that the comparison callback knows that current function.
35 // -fsanitize-coverage=...,bb also allows to collect basic block coverage.
39 // # Collect data flow and coverage for INPUT_FILE
40 // # write to OUTPUT_FILE (default: stdout)
41 // export DFSAN_OPTIONS=warn_unimplemented=0
42 // ./a.out INPUT_FILE [OUTPUT_FILE]
44 // # Print all instrumented functions. llvm-symbolizer must be present in PATH
54 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
55 // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
56 // in addition to the function's entry block, out of T total instrumented
59 //===----------------------------------------------------------------------===*/
67 #include <execinfo.h> // backtrace_symbols_fd
72 extern int LLVMFuzzerTestOneInput(const unsigned char *Data
, size_t Size
);
73 __attribute__((weak
)) extern int LLVMFuzzerInitialize(int *argc
, char ***argv
);
77 static size_t InputLen
;
78 static size_t NumIterations
;
79 static dfsan_label
**FuncLabelsPerIter
; // NumIterations x NumFuncs;
81 static inline bool BlockIsEntry(size_t BlockIdx
) {
82 return __dft
.PCsBeg
[BlockIdx
* 2 + 1] & PCFLAG_FUNC_ENTRY
;
85 const int kNumLabels
= 16;
87 // Prints all instrumented functions.
88 static int PrintFunctions() {
89 // We don't have the symbolizer integrated with dfsan yet.
90 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
91 // TODO(kcc): this is pretty ugly and may break in lots of ways.
92 // We'll need to make a proper in-process symbolizer work with DFSan.
93 FILE *Pipe
= popen("sed 's/(+/ /g; s/).*//g' "
96 "| sed 's/dfs\\$//g' "
99 for (size_t I
= 0; I
< __dft
.NumGuards
; I
++) {
100 uintptr_t PC
= __dft
.PCsBeg
[I
* 2];
101 if (!BlockIsEntry(I
)) continue;
102 void *const Buf
[1] = {(void*)PC
};
103 backtrace_symbols_fd(Buf
, 1, fileno(Pipe
));
109 static void PrintBinary(FILE *Out
, dfsan_label L
, size_t Len
) {
110 char buf
[kNumLabels
+ 1];
111 assert(Len
<= kNumLabels
);
112 for (int i
= 0; i
< kNumLabels
; i
++)
113 buf
[i
] = (L
& (1 << i
)) ? '1' : '0';
115 fprintf(Out
, "%s", buf
);
118 static void PrintDataFlow(FILE *Out
) {
119 for (size_t Func
= 0; Func
< __dft
.NumFuncs
; Func
++) {
121 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++)
122 if (FuncLabelsPerIter
[Iter
][Func
])
126 fprintf(Out
, "F%zd ", Func
);
127 size_t LenOfLastIteration
= kNumLabels
;
128 if (auto Tail
= InputLen
% kNumLabels
)
129 LenOfLastIteration
= Tail
;
130 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++)
131 PrintBinary(Out
, FuncLabelsPerIter
[Iter
][Func
],
132 Iter
== NumIterations
- 1 ? LenOfLastIteration
: kNumLabels
);
137 static void PrintCoverage(FILE *Out
) {
138 ssize_t CurrentFuncGuard
= -1;
139 ssize_t CurrentFuncNum
= -1;
140 ssize_t NumBlocksInCurrentFunc
= -1;
141 for (size_t FuncBeg
= 0; FuncBeg
< __dft
.NumGuards
;) {
143 assert(BlockIsEntry(FuncBeg
));
144 size_t FuncEnd
= FuncBeg
+ 1;
145 for (; FuncEnd
< __dft
.NumGuards
&& !BlockIsEntry(FuncEnd
); FuncEnd
++)
147 if (__dft
.BBExecuted
[FuncBeg
]) {
148 fprintf(Out
, "C%zd", CurrentFuncNum
);
149 for (size_t I
= FuncBeg
+ 1; I
< FuncEnd
; I
++)
150 if (__dft
.BBExecuted
[I
])
151 fprintf(Out
, " %zd", I
- FuncBeg
);
152 fprintf(Out
, " %zd\n", FuncEnd
- FuncBeg
);
158 int main(int argc
, char **argv
) {
159 if (LLVMFuzzerInitialize
)
160 LLVMFuzzerInitialize(&argc
, &argv
);
162 return PrintFunctions();
163 assert(argc
== 2 || argc
== 3);
165 const char *Input
= argv
[1];
166 fprintf(stderr
, "INFO: reading '%s'\n", Input
);
167 FILE *In
= fopen(Input
, "r");
169 fseek(In
, 0, SEEK_END
);
170 InputLen
= ftell(In
);
171 fseek(In
, 0, SEEK_SET
);
172 unsigned char *Buf
= (unsigned char*)malloc(InputLen
);
173 size_t NumBytesRead
= fread(Buf
, 1, InputLen
, In
);
174 assert(NumBytesRead
== InputLen
);
177 NumIterations
= (NumBytesRead
+ kNumLabels
- 1) / kNumLabels
;
179 (dfsan_label
**)calloc(NumIterations
, sizeof(dfsan_label
*));
180 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++)
181 FuncLabelsPerIter
[Iter
] =
182 (dfsan_label
*)calloc(__dft
.NumFuncs
, sizeof(dfsan_label
));
184 for (size_t Iter
= 0; Iter
< NumIterations
; Iter
++) {
185 fprintf(stderr
, "INFO: running '%s' %zd/%zd\n", Input
, Iter
, NumIterations
);
187 dfsan_set_label(0, Buf
, InputLen
);
188 __dft
.FuncLabels
= FuncLabelsPerIter
[Iter
];
190 size_t BaseIdx
= Iter
* kNumLabels
;
191 size_t LastIdx
= BaseIdx
+ kNumLabels
< NumBytesRead
? BaseIdx
+ kNumLabels
193 assert(BaseIdx
< LastIdx
);
194 for (size_t Idx
= BaseIdx
; Idx
< LastIdx
; Idx
++)
195 dfsan_set_label(1 << (Idx
- BaseIdx
), Buf
+ Idx
, 1);
196 LLVMFuzzerTestOneInput(Buf
, InputLen
);
200 bool OutIsStdout
= argc
== 2;
201 fprintf(stderr
, "INFO: writing dataflow to %s\n",
202 OutIsStdout
? "<stdout>" : argv
[2]);
203 FILE *Out
= OutIsStdout
? stdout
: fopen(argv
[2], "w");
206 if (!OutIsStdout
) fclose(Out
);