1 //===-- dfsan.cpp ---------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of DataFlowSanitizer.
11 // DataFlowSanitizer runtime. This file defines the public interface to
12 // DataFlowSanitizer as well as the definition of certain runtime functions
13 // called automatically by the compiler (specifically the instrumentation pass
14 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
16 // The public interface is defined in include/sanitizer/dfsan_interface.h whose
17 // functions are prefixed dfsan_ while the compiler interface functions are
19 //===----------------------------------------------------------------------===//
21 #include "dfsan/dfsan.h"
23 #include "dfsan/dfsan_chained_origin_depot.h"
24 #include "dfsan/dfsan_flags.h"
25 #include "dfsan/dfsan_origin.h"
26 #include "dfsan/dfsan_thread.h"
27 #include "sanitizer_common/sanitizer_atomic.h"
28 #include "sanitizer_common/sanitizer_common.h"
29 #include "sanitizer_common/sanitizer_file.h"
30 #include "sanitizer_common/sanitizer_flag_parser.h"
31 #include "sanitizer_common/sanitizer_flags.h"
32 #include "sanitizer_common/sanitizer_internal_defs.h"
33 #include "sanitizer_common/sanitizer_libc.h"
34 #include "sanitizer_common/sanitizer_report_decorator.h"
35 #include "sanitizer_common/sanitizer_stacktrace.h"
37 # include <sys/personality.h>
40 using namespace __dfsan
;
42 Flags
__dfsan::flags_data
;
44 // The size of TLS variables. These constants must be kept in sync with the ones
45 // in DataFlowSanitizer.cpp.
46 static const int kDFsanArgTlsSize
= 800;
47 static const int kDFsanRetvalTlsSize
= 800;
48 static const int kDFsanArgOriginTlsSize
= 800;
50 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64
51 __dfsan_retval_tls
[kDFsanRetvalTlsSize
/ sizeof(u64
)];
52 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls
;
53 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64
54 __dfsan_arg_tls
[kDFsanArgTlsSize
/ sizeof(u64
)];
55 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32
56 __dfsan_arg_origin_tls
[kDFsanArgOriginTlsSize
/ sizeof(u32
)];
58 // Instrumented code may set this value in terms of -dfsan-track-origins.
59 // * undefined or 0: do not track origins.
60 // * 1: track origins at memory store operations.
61 // * 2: track origins at memory load and store operations.
62 // TODO: track callsites.
63 extern "C" SANITIZER_WEAK_ATTRIBUTE
const int __dfsan_track_origins
;
65 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
int dfsan_get_track_origins() {
66 return &__dfsan_track_origins
? __dfsan_track_origins
: 0;
69 // On Linux/x86_64, memory is laid out as follows:
71 // +--------------------+ 0x800000000000 (top of memory)
73 // +--------------------+ 0x700000000000
75 // +--------------------+ 0x610000000000
77 // +--------------------+ 0x600000000000
79 // +--------------------+ 0x510000000000
81 // +--------------------+ 0x500000000000
83 // +--------------------+ 0x400000000000
85 // +--------------------+ 0x300000000000
87 // +--------------------+ 0x200000000000
89 // +--------------------+ 0x110000000000
91 // +--------------------+ 0x100000000000
93 // +--------------------+ 0x010000000000
95 // +--------------------+ 0x000000000000
97 // MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
98 // SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
100 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
101 dfsan_label
__dfsan_union_load(const dfsan_label
*ls
, uptr n
) {
102 dfsan_label label
= ls
[0];
103 for (uptr i
= 1; i
!= n
; ++i
)
108 // Return the union of all the n labels from addr at the high 32 bit, and the
109 // origin of the first taint byte at the low 32 bit.
110 extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64
111 __dfsan_load_label_and_origin(const void *addr
, uptr n
) {
112 dfsan_label label
= 0;
115 dfsan_label
*s
= shadow_for((void *)p
);
116 for (uptr i
= 0; i
< n
; ++i
) {
117 dfsan_label l
= s
[i
];
122 ret
= *(dfsan_origin
*)origin_for((void *)(p
+ i
));
124 return ret
| (u64
)label
<< 32;
127 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
128 void __dfsan_unimplemented(char *fname
) {
129 if (flags().warn_unimplemented
)
130 Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",
134 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_wrapper_extern_weak_null(
135 const void *addr
, char *fname
) {
138 "ERROR: DataFlowSanitizer: dfsan generated wrapper calling null "
139 "extern_weak function %s\nIf this only happens with dfsan, the "
140 "dfsan instrumentation pass may be accidentally optimizing out a "
145 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function
146 // to try to figure out where labels are being introduced in a nominally
147 // label-free program.
148 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_nonzero_label() {
149 if (flags().warn_nonzero_labels
)
150 Report("WARNING: DataFlowSanitizer: saw nonzero label\n");
153 // Indirect call to an uninstrumented vararg function. We don't have a way of
154 // handling these at the moment.
155 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
156 __dfsan_vararg_wrapper(const char *fname
) {
157 Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "
158 "function %s\n", fname
);
162 // Resolves the union of two labels.
163 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
164 dfsan_union(dfsan_label l1
, dfsan_label l2
) {
168 static const uptr kOriginAlign
= sizeof(dfsan_origin
);
169 static const uptr kOriginAlignMask
= ~(kOriginAlign
- 1UL);
171 static uptr
OriginAlignUp(uptr u
) {
172 return (u
+ kOriginAlign
- 1) & kOriginAlignMask
;
175 static uptr
OriginAlignDown(uptr u
) { return u
& kOriginAlignMask
; }
177 // Return the origin of the first taint byte in the size bytes from the address
179 static dfsan_origin
GetOriginIfTainted(uptr addr
, uptr size
) {
180 for (uptr i
= 0; i
< size
; ++i
, ++addr
) {
181 dfsan_label
*s
= shadow_for((void *)addr
);
184 // Validate address region.
185 CHECK(MEM_IS_SHADOW(s
));
186 return *(dfsan_origin
*)origin_for((void *)addr
);
192 // For platforms which support slow unwinder only, we need to restrict the store
193 // context size to 1, basically only storing the current pc, because the slow
194 // unwinder which is based on libunwind is not async signal safe and causes
195 // random freezes in forking applications as well as in signal handlers.
196 // DFSan supports only Linux. So we do not restrict the store context size.
197 #define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \
198 UNINITIALIZED BufferedStackTrace stack; \
199 stack.Unwind(pc, bp, nullptr, true, flags().store_context_size);
201 #define PRINT_CALLER_STACK_TRACE \
204 GET_STORE_STACK_TRACE_PC_BP(pc, bp) \
208 // Return a chain with the previous ID id and the current stack.
209 // from_init = true if this is the first chain of an origin tracking path.
210 static u32
ChainOrigin(u32 id
, StackTrace
*stack
, bool from_init
= false) {
211 // StackDepot is not async signal safe. Do not create new chains in a signal
213 DFsanThread
*t
= GetCurrentThread();
214 if (t
&& t
->InSignalHandler())
217 // As an optimization the origin of an application byte is updated only when
218 // its shadow is non-zero. Because we are only interested in the origins of
219 // taint labels, it does not matter what origin a zero label has. This reduces
220 // memory write cost. MSan does similar optimization. The following invariant
221 // may not hold because of some bugs. We check the invariant to help debug.
222 if (!from_init
&& id
== 0 && flags().check_origin_invariant
) {
223 Printf(" DFSan found invalid origin invariant\n");
224 PRINT_CALLER_STACK_TRACE
227 Origin o
= Origin::FromRawId(id
);
228 stack
->tag
= StackTrace::TAG_UNKNOWN
;
229 Origin chained
= Origin::CreateChainedOrigin(o
, stack
);
230 return chained
.raw_id();
233 static void ChainAndWriteOriginIfTainted(uptr src
, uptr size
, uptr dst
,
235 dfsan_origin o
= GetOriginIfTainted(src
, size
);
237 o
= ChainOrigin(o
, stack
);
238 *(dfsan_origin
*)origin_for((void *)dst
) = o
;
242 // Copy the origins of the size bytes from src to dst. The source and target
243 // memory ranges cannot be overlapped. This is used by memcpy. stack records the
244 // stack trace of the memcpy. When dst and src are not 4-byte aligned properly,
245 // origins at the unaligned address boundaries may be overwritten because four
246 // contiguous bytes share the same origin.
247 static void CopyOrigin(const void *dst
, const void *src
, uptr size
,
250 uptr beg
= OriginAlignDown(d
);
251 // Copy left unaligned origin if that memory is tainted.
253 ChainAndWriteOriginIfTainted((uptr
)src
, beg
+ kOriginAlign
- d
, beg
, stack
);
257 uptr end
= OriginAlignDown(d
+ size
);
258 // If both ends fall into the same 4-byte slot, we are done.
262 // Copy right unaligned origin if that memory is tainted.
264 ChainAndWriteOriginIfTainted((uptr
)src
+ (end
- d
), (d
+ size
) - end
, end
,
271 uptr src_a
= OriginAlignUp((uptr
)src
);
272 dfsan_origin
*src_o
= origin_for((void *)src_a
);
273 u32
*src_s
= (u32
*)shadow_for((void *)src_a
);
274 dfsan_origin
*src_end
= origin_for((void *)(src_a
+ (end
- beg
)));
275 dfsan_origin
*dst_o
= origin_for((void *)beg
);
276 dfsan_origin last_src_o
= 0;
277 dfsan_origin last_dst_o
= 0;
278 for (; src_o
< src_end
; ++src_o
, ++src_s
, ++dst_o
) {
281 if (*src_o
!= last_src_o
) {
283 last_dst_o
= ChainOrigin(last_src_o
, stack
);
289 // Copy the origins of the size bytes from src to dst. The source and target
290 // memory ranges may be overlapped. So the copy is done in a reverse order.
291 // This is used by memmove. stack records the stack trace of the memmove.
292 static void ReverseCopyOrigin(const void *dst
, const void *src
, uptr size
,
295 uptr end
= OriginAlignDown(d
+ size
);
297 // Copy right unaligned origin if that memory is tainted.
299 ChainAndWriteOriginIfTainted((uptr
)src
+ (end
- d
), (d
+ size
) - end
, end
,
302 uptr beg
= OriginAlignDown(d
);
304 if (beg
+ kOriginAlign
< end
) {
306 uptr src_a
= OriginAlignUp((uptr
)src
);
307 void *src_end
= (void *)(src_a
+ end
- beg
- kOriginAlign
);
308 dfsan_origin
*src_end_o
= origin_for(src_end
);
309 u32
*src_end_s
= (u32
*)shadow_for(src_end
);
310 dfsan_origin
*src_begin_o
= origin_for((void *)src_a
);
311 dfsan_origin
*dst
= origin_for((void *)(end
- kOriginAlign
));
312 dfsan_origin last_src_o
= 0;
313 dfsan_origin last_dst_o
= 0;
314 for (; src_end_o
>= src_begin_o
; --src_end_o
, --src_end_s
, --dst
) {
317 if (*src_end_o
!= last_src_o
) {
318 last_src_o
= *src_end_o
;
319 last_dst_o
= ChainOrigin(last_src_o
, stack
);
325 // Copy left unaligned origin if that memory is tainted.
327 ChainAndWriteOriginIfTainted((uptr
)src
, beg
+ kOriginAlign
- d
, beg
, stack
);
330 // Copy or move the origins of the len bytes from src to dst. The source and
331 // target memory ranges may or may not be overlapped. This is used by memory
332 // transfer operations. stack records the stack trace of the memory transfer
334 static void MoveOrigin(const void *dst
, const void *src
, uptr size
,
336 // Validate address regions.
337 if (!MEM_IS_SHADOW(shadow_for(dst
)) ||
338 !MEM_IS_SHADOW(shadow_for((void *)((uptr
)dst
+ size
))) ||
339 !MEM_IS_SHADOW(shadow_for(src
)) ||
340 !MEM_IS_SHADOW(shadow_for((void *)((uptr
)src
+ size
)))) {
344 // If destination origin range overlaps with source origin range, move
345 // origins by copying origins in a reverse order; otherwise, copy origins in
346 // a normal order. The orders of origin transfer are consistent with the
347 // orders of how memcpy and memmove transfer user data.
348 uptr src_aligned_beg
= OriginAlignDown((uptr
)src
);
349 uptr src_aligned_end
= OriginAlignDown((uptr
)src
+ size
);
350 uptr dst_aligned_beg
= OriginAlignDown((uptr
)dst
);
351 if (dst_aligned_beg
< src_aligned_end
&& dst_aligned_beg
>= src_aligned_beg
)
352 return ReverseCopyOrigin(dst
, src
, size
, stack
);
353 return CopyOrigin(dst
, src
, size
, stack
);
356 // Set the size bytes from the addres dst to be the origin value.
357 static void SetOrigin(const void *dst
, uptr size
, u32 origin
) {
361 // Origin mapping is 4 bytes per 4 bytes of application memory.
362 // Here we extend the range such that its left and right bounds are both
364 uptr x
= unaligned_origin_for((uptr
)dst
);
365 uptr beg
= OriginAlignDown(x
);
366 uptr end
= OriginAlignUp(x
+ size
); // align up.
367 u64 origin64
= ((u64
)origin
<< 32) | origin
;
368 // This is like memset, but the value is 32-bit. We unroll by 2 to write
369 // 64 bits at once. May want to unroll further to get 128-bit stores.
371 if (*(u32
*)beg
!= origin
)
372 *(u32
*)beg
= origin
;
375 for (uptr addr
= beg
; addr
< (end
& ~7UL); addr
+= 8) {
376 if (*(u64
*)addr
== origin64
)
378 *(u64
*)addr
= origin64
;
381 if (*(u32
*)(end
- kOriginAlign
) != origin
)
382 *(u32
*)(end
- kOriginAlign
) = origin
;
385 #define RET_CHAIN_ORIGIN(id) \
387 GET_STORE_STACK_TRACE_PC_BP(pc, bp); \
388 return ChainOrigin(id, &stack);
390 // Return a new origin chain with the previous ID id and the current stack
392 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
393 __dfsan_chain_origin(dfsan_origin id
) {
397 // Return a new origin chain with the previous ID id and the current stack
398 // trace if the label is tainted.
399 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
400 __dfsan_chain_origin_if_tainted(dfsan_label label
, dfsan_origin id
) {
406 // Copy or move the origins of the len bytes from src to dst.
407 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_mem_origin_transfer(
408 const void *dst
, const void *src
, uptr len
) {
412 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
413 MoveOrigin(dst
, src
, len
, &stack
);
416 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_mem_origin_transfer(
417 const void *dst
, const void *src
, uptr len
) {
418 __dfsan_mem_origin_transfer(dst
, src
, len
);
421 static void CopyShadow(void *dst
, const void *src
, uptr len
) {
422 internal_memcpy((void *)__dfsan::shadow_for(dst
),
423 (const void *)__dfsan::shadow_for(src
),
424 len
* sizeof(dfsan_label
));
427 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_mem_shadow_transfer(
428 void *dst
, const void *src
, uptr len
) {
429 CopyShadow(dst
, src
, len
);
432 // Copy shadow and origins of the len bytes from src to dst.
433 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
434 __dfsan_mem_shadow_origin_transfer(void *dst
, const void *src
, uptr size
) {
437 CopyShadow(dst
, src
, size
);
438 if (dfsan_get_track_origins()) {
439 // Duplicating code instead of calling __dfsan_mem_origin_transfer
440 // so that the getting the caller stack frame works correctly.
442 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
443 MoveOrigin(dst
, src
, size
, &stack
);
447 // Copy shadow and origins as per __atomic_compare_exchange.
448 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
449 __dfsan_mem_shadow_origin_conditional_exchange(u8 condition
, void *target
,
451 const void *desired
, uptr size
) {
454 // condition is result of native call to __atomic_compare_exchange
456 // Copy desired into target
460 // Copy target into expected
466 CopyShadow(dst
, src
, size
);
467 if (dfsan_get_track_origins()) {
468 // Duplicating code instead of calling __dfsan_mem_origin_transfer
469 // so that the getting the caller stack frame works correctly.
471 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
472 MoveOrigin(dst
, src
, size
, &stack
);
476 bool __dfsan::dfsan_inited
;
477 bool __dfsan::dfsan_init_is_running
;
479 void __dfsan::dfsan_copy_memory(void *dst
, const void *src
, uptr size
) {
480 internal_memcpy(dst
, src
, size
);
481 dfsan_mem_shadow_transfer(dst
, src
, size
);
482 if (dfsan_get_track_origins())
483 dfsan_mem_origin_transfer(dst
, src
, size
);
486 // Releases the pages within the origin address range.
487 static void ReleaseOrigins(void *addr
, uptr size
) {
488 const uptr beg_origin_addr
= (uptr
)__dfsan::origin_for(addr
);
489 const void *end_addr
= (void *)((uptr
)addr
+ size
);
490 const uptr end_origin_addr
= (uptr
)__dfsan::origin_for(end_addr
);
492 if (end_origin_addr
- beg_origin_addr
<
493 common_flags()->clear_shadow_mmap_threshold
)
496 const uptr page_size
= GetPageSizeCached();
497 const uptr beg_aligned
= RoundUpTo(beg_origin_addr
, page_size
);
498 const uptr end_aligned
= RoundDownTo(end_origin_addr
, page_size
);
500 if (!MmapFixedSuperNoReserve(beg_aligned
, end_aligned
- beg_aligned
))
504 static void WriteZeroShadowInRange(uptr beg
, uptr end
) {
505 // Don't write the label if it is already the value we need it to be.
506 // In a program where most addresses are not labeled, it is common that
507 // a page of shadow memory is entirely zeroed. The Linux copy-on-write
508 // implementation will share all of the zeroed pages, making a copy of a
509 // page when any value is written. The un-sharing will happen even if
510 // the value written does not change the value in memory. Avoiding the
511 // write when both |label| and |*labelp| are zero dramatically reduces
512 // the amount of real memory used by large programs.
513 if (!mem_is_zero((const char *)beg
, end
- beg
))
514 internal_memset((void *)beg
, 0, end
- beg
);
517 // Releases the pages within the shadow address range, and sets
518 // the shadow addresses not on the pages to be 0.
519 static void ReleaseOrClearShadows(void *addr
, uptr size
) {
520 const uptr beg_shadow_addr
= (uptr
)__dfsan::shadow_for(addr
);
521 const void *end_addr
= (void *)((uptr
)addr
+ size
);
522 const uptr end_shadow_addr
= (uptr
)__dfsan::shadow_for(end_addr
);
524 if (end_shadow_addr
- beg_shadow_addr
<
525 common_flags()->clear_shadow_mmap_threshold
) {
526 WriteZeroShadowInRange(beg_shadow_addr
, end_shadow_addr
);
530 const uptr page_size
= GetPageSizeCached();
531 const uptr beg_aligned
= RoundUpTo(beg_shadow_addr
, page_size
);
532 const uptr end_aligned
= RoundDownTo(end_shadow_addr
, page_size
);
534 if (beg_aligned
>= end_aligned
) {
535 WriteZeroShadowInRange(beg_shadow_addr
, end_shadow_addr
);
537 if (beg_aligned
!= beg_shadow_addr
)
538 WriteZeroShadowInRange(beg_shadow_addr
, beg_aligned
);
539 if (end_aligned
!= end_shadow_addr
)
540 WriteZeroShadowInRange(end_aligned
, end_shadow_addr
);
541 if (!MmapFixedSuperNoReserve(beg_aligned
, end_aligned
- beg_aligned
))
546 static void SetShadow(dfsan_label label
, void *addr
, uptr size
,
547 dfsan_origin origin
) {
549 const uptr beg_shadow_addr
= (uptr
)__dfsan::shadow_for(addr
);
550 internal_memset((void *)beg_shadow_addr
, label
, size
);
551 if (dfsan_get_track_origins())
552 SetOrigin(addr
, size
, origin
);
556 if (dfsan_get_track_origins())
557 ReleaseOrigins(addr
, size
);
559 ReleaseOrClearShadows(addr
, size
);
562 // If the label s is tainted, set the size bytes from the address p to be a new
563 // origin chain with the previous ID o and the current stack trace. This is
564 // used by instrumentation to reduce code size when too much code is inserted.
565 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_maybe_store_origin(
566 dfsan_label s
, void *p
, uptr size
, dfsan_origin o
) {
569 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
570 SetOrigin(p
, size
, ChainOrigin(o
, &stack
));
574 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_set_label(
575 dfsan_label label
, dfsan_origin origin
, void *addr
, uptr size
) {
576 SetShadow(label
, addr
, size
, origin
);
579 SANITIZER_INTERFACE_ATTRIBUTE
580 void dfsan_set_label(dfsan_label label
, void *addr
, uptr size
) {
581 dfsan_origin init_origin
= 0;
582 if (label
&& dfsan_get_track_origins()) {
584 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
585 init_origin
= ChainOrigin(0, &stack
, true);
587 SetShadow(label
, addr
, size
, init_origin
);
590 SANITIZER_INTERFACE_ATTRIBUTE
591 void dfsan_add_label(dfsan_label label
, void *addr
, uptr size
) {
595 if (dfsan_get_track_origins()) {
597 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
598 dfsan_origin init_origin
= ChainOrigin(0, &stack
, true);
599 SetOrigin(addr
, size
, init_origin
);
602 for (dfsan_label
*labelp
= shadow_for(addr
); size
!= 0; --size
, ++labelp
)
606 // Unlike the other dfsan interface functions the behavior of this function
607 // depends on the label of one of its arguments. Hence it is implemented as a
609 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
610 __dfsw_dfsan_get_label(long data
, dfsan_label data_label
,
611 dfsan_label
*ret_label
) {
616 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
__dfso_dfsan_get_label(
617 long data
, dfsan_label data_label
, dfsan_label
*ret_label
,
618 dfsan_origin data_origin
, dfsan_origin
*ret_origin
) {
624 // This function is used if dfsan_get_origin is called when origin tracking is
626 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfsw_dfsan_get_origin(
627 long data
, dfsan_label data_label
, dfsan_label
*ret_label
) {
632 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfso_dfsan_get_origin(
633 long data
, dfsan_label data_label
, dfsan_label
*ret_label
,
634 dfsan_origin data_origin
, dfsan_origin
*ret_origin
) {
640 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
641 dfsan_read_label(const void *addr
, uptr size
) {
644 return __dfsan_union_load(shadow_for(addr
), size
);
647 SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
648 dfsan_read_origin_of_first_taint(const void *addr
, uptr size
) {
649 return GetOriginIfTainted((uptr
)addr
, size
);
652 SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_set_label_origin(dfsan_label label
,
656 __dfsan_set_label(label
, origin
, addr
, size
);
659 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
int
660 dfsan_has_label(dfsan_label label
, dfsan_label elem
) {
661 return (label
& elem
) == elem
;
665 typedef void (*dfsan_conditional_callback_t
)(dfsan_label label
,
666 dfsan_origin origin
);
668 } // namespace __dfsan
669 static dfsan_conditional_callback_t conditional_callback
= nullptr;
670 static dfsan_label labels_in_signal_conditional
= 0;
672 static void ConditionalCallback(dfsan_label label
, dfsan_origin origin
) {
673 // Programs have many branches. For efficiency the conditional sink callback
674 // handler needs to ignore as many as possible as early as possible.
678 if (conditional_callback
== nullptr) {
682 // This initial ConditionalCallback handler needs to be in here in dfsan
683 // runtime (rather than being an entirely user implemented hook) so that it
684 // has access to dfsan thread information.
685 DFsanThread
*t
= GetCurrentThread();
686 // A callback operation which does useful work (like record the flow) will
687 // likely be too long executed in a signal handler.
688 if (t
&& t
->InSignalHandler()) {
689 // Record set of labels used in signal handler for completeness.
690 labels_in_signal_conditional
|= label
;
694 conditional_callback(label
, origin
);
697 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
698 __dfsan_conditional_callback_origin(dfsan_label label
, dfsan_origin origin
) {
699 ConditionalCallback(label
, origin
);
702 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_conditional_callback(
704 ConditionalCallback(label
, 0);
707 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_set_conditional_callback(
708 __dfsan::dfsan_conditional_callback_t callback
) {
709 conditional_callback
= callback
;
712 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
713 dfsan_get_labels_in_signal_conditional() {
714 return labels_in_signal_conditional
;
718 typedef void (*dfsan_reaches_function_callback_t
)(dfsan_label label
,
722 const char *function
);
724 } // namespace __dfsan
725 static dfsan_reaches_function_callback_t reaches_function_callback
= nullptr;
726 static dfsan_label labels_in_signal_reaches_function
= 0;
728 static void ReachesFunctionCallback(dfsan_label label
, dfsan_origin origin
,
729 const char *file
, unsigned int line
,
730 const char *function
) {
734 if (reaches_function_callback
== nullptr) {
738 // This initial ReachesFunctionCallback handler needs to be in here in dfsan
739 // runtime (rather than being an entirely user implemented hook) so that it
740 // has access to dfsan thread information.
741 DFsanThread
*t
= GetCurrentThread();
742 // A callback operation which does useful work (like record the flow) will
743 // likely be too long executed in a signal handler.
744 if (t
&& t
->InSignalHandler()) {
745 // Record set of labels used in signal handler for completeness.
746 labels_in_signal_reaches_function
|= label
;
750 reaches_function_callback(label
, origin
, file
, line
, function
);
753 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
754 __dfsan_reaches_function_callback_origin(dfsan_label label
, dfsan_origin origin
,
755 const char *file
, unsigned int line
,
756 const char *function
) {
757 ReachesFunctionCallback(label
, origin
, file
, line
, function
);
760 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
761 __dfsan_reaches_function_callback(dfsan_label label
, const char *file
,
762 unsigned int line
, const char *function
) {
763 ReachesFunctionCallback(label
, 0, file
, line
, function
);
766 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
767 dfsan_set_reaches_function_callback(
768 __dfsan::dfsan_reaches_function_callback_t callback
) {
769 reaches_function_callback
= callback
;
772 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
773 dfsan_get_labels_in_signal_reaches_function() {
774 return labels_in_signal_reaches_function
;
778 class Decorator
: public __sanitizer::SanitizerCommonDecorator
{
780 Decorator() : SanitizerCommonDecorator() {}
781 const char *Origin() const { return Magenta(); }
785 static void PrintNoOriginTrackingWarning() {
788 " %sDFSan: origin tracking is not enabled. Did you specify the "
789 "-dfsan-track-origins=1 option?%s\n",
790 d
.Warning(), d
.Default());
793 static void PrintNoTaintWarning(const void *address
) {
795 Printf(" %sDFSan: no tainted value at %x%s\n", d
.Warning(), address
,
799 static void PrintInvalidOriginWarning(dfsan_label label
, const void *address
) {
802 " %sTaint value 0x%x (at %p) has invalid origin tracking. This can "
803 "be a DFSan bug.%s\n",
804 d
.Warning(), label
, address
, d
.Default());
807 static void PrintInvalidOriginIdWarning(dfsan_origin origin
) {
810 " %sOrigin Id %d has invalid origin tracking. This can "
811 "be a DFSan bug.%s\n",
812 d
.Warning(), origin
, d
.Default());
815 static bool PrintOriginTraceFramesToStr(Origin o
, InternalScopedString
*out
) {
819 while (o
.isChainedOrigin()) {
821 dfsan_origin origin_id
= o
.raw_id();
822 o
= o
.getNextChainedOrigin(&stack
);
823 if (o
.isChainedOrigin())
825 " %sOrigin value: 0x%x, Taint value was stored to memory at%s\n",
826 d
.Origin(), origin_id
, d
.Default());
828 out
->AppendF(" %sOrigin value: 0x%x, Taint value was created at%s\n",
829 d
.Origin(), origin_id
, d
.Default());
831 // Includes a trailing newline, so no need to add it again.
839 static bool PrintOriginTraceToStr(const void *addr
, const char *description
,
840 InternalScopedString
*out
) {
842 CHECK(dfsan_get_track_origins());
845 const dfsan_label label
= *__dfsan::shadow_for(addr
);
848 const dfsan_origin origin
= *__dfsan::origin_for(addr
);
850 out
->AppendF(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n",
851 d
.Origin(), label
, addr
, description
? description
: "",
854 Origin o
= Origin::FromRawId(origin
);
855 return PrintOriginTraceFramesToStr(o
, out
);
858 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_print_origin_trace(
859 const void *addr
, const char *description
) {
860 if (!dfsan_get_track_origins()) {
861 PrintNoOriginTrackingWarning();
865 const dfsan_label label
= *__dfsan::shadow_for(addr
);
867 PrintNoTaintWarning(addr
);
871 InternalScopedString trace
;
872 bool success
= PrintOriginTraceToStr(addr
, description
, &trace
);
875 Printf("%s", trace
.data());
878 PrintInvalidOriginWarning(label
, addr
);
881 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
882 dfsan_sprint_origin_trace(const void *addr
, const char *description
,
883 char *out_buf
, uptr out_buf_size
) {
886 if (!dfsan_get_track_origins()) {
887 PrintNoOriginTrackingWarning();
891 const dfsan_label label
= *__dfsan::shadow_for(addr
);
893 PrintNoTaintWarning(addr
);
897 InternalScopedString trace
;
898 bool success
= PrintOriginTraceToStr(addr
, description
, &trace
);
901 PrintInvalidOriginWarning(label
, addr
);
906 internal_strncpy(out_buf
, trace
.data(), out_buf_size
- 1);
907 out_buf
[out_buf_size
- 1] = '\0';
910 return trace
.length();
913 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_print_origin_id_trace(
914 dfsan_origin origin
) {
915 if (!dfsan_get_track_origins()) {
916 PrintNoOriginTrackingWarning();
919 Origin o
= Origin::FromRawId(origin
);
921 InternalScopedString trace
;
922 bool success
= PrintOriginTraceFramesToStr(o
, &trace
);
925 Printf("%s", trace
.data());
928 PrintInvalidOriginIdWarning(origin
);
931 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
dfsan_sprint_origin_id_trace(
932 dfsan_origin origin
, char *out_buf
, uptr out_buf_size
) {
935 if (!dfsan_get_track_origins()) {
936 PrintNoOriginTrackingWarning();
939 Origin o
= Origin::FromRawId(origin
);
941 InternalScopedString trace
;
942 bool success
= PrintOriginTraceFramesToStr(o
, &trace
);
945 PrintInvalidOriginIdWarning(origin
);
950 internal_strncpy(out_buf
, trace
.data(), out_buf_size
- 1);
951 out_buf
[out_buf_size
- 1] = '\0';
954 return trace
.length();
957 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
958 dfsan_get_init_origin(const void *addr
) {
959 if (!dfsan_get_track_origins())
962 const dfsan_label label
= *__dfsan::shadow_for(addr
);
966 const dfsan_origin origin
= *__dfsan::origin_for(addr
);
968 Origin o
= Origin::FromRawId(origin
);
969 dfsan_origin origin_id
= o
.raw_id();
970 while (o
.isChainedOrigin()) {
972 origin_id
= o
.raw_id();
973 o
= o
.getNextChainedOrigin(&stack
);
978 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc
, uptr bp
,
982 using namespace __dfsan
;
983 DFsanThread
*t
= GetCurrentThread();
984 if (!t
|| !StackTrace::WillUseFastUnwind(request_fast
)) {
985 return Unwind(max_depth
, pc
, bp
, context
, 0, 0, false);
987 Unwind(max_depth
, pc
, bp
, nullptr, t
->stack_top(), t
->stack_bottom(), true);
990 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __sanitizer_print_stack_trace() {
992 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
996 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
997 dfsan_sprint_stack_trace(char *out_buf
, uptr out_buf_size
) {
1000 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
1001 return stack
.PrintTo(out_buf
, out_buf_size
);
1004 void Flags::SetDefaults() {
1005 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
1006 #include "dfsan_flags.inc"
1010 static void RegisterDfsanFlags(FlagParser
*parser
, Flags
*f
) {
1011 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \
1012 RegisterFlag(parser, #Name, Description, &f->Name);
1013 #include "dfsan_flags.inc"
1017 static void InitializeFlags() {
1018 SetCommonFlagsDefaults();
1021 cf
.CopyFrom(*common_flags());
1022 cf
.intercept_tls_get_addr
= true;
1023 OverrideCommonFlags(cf
);
1025 flags().SetDefaults();
1028 RegisterCommonFlags(&parser
);
1029 RegisterDfsanFlags(&parser
, &flags());
1030 parser
.ParseStringFromEnv("DFSAN_OPTIONS");
1031 InitializeCommonFlags();
1032 if (Verbosity()) ReportUnrecognizedFlags();
1033 if (common_flags()->help
) parser
.PrintFlagDescriptions();
1036 SANITIZER_INTERFACE_ATTRIBUTE
1037 void dfsan_clear_arg_tls(uptr offset
, uptr size
) {
1038 internal_memset((void *)((uptr
)__dfsan_arg_tls
+ offset
), 0, size
);
1041 SANITIZER_INTERFACE_ATTRIBUTE
1042 void dfsan_clear_thread_local_state() {
1043 internal_memset(__dfsan_arg_tls
, 0, sizeof(__dfsan_arg_tls
));
1044 internal_memset(__dfsan_retval_tls
, 0, sizeof(__dfsan_retval_tls
));
1046 if (dfsan_get_track_origins()) {
1047 internal_memset(__dfsan_arg_origin_tls
, 0, sizeof(__dfsan_arg_origin_tls
));
1048 internal_memset(&__dfsan_retval_origin_tls
, 0,
1049 sizeof(__dfsan_retval_origin_tls
));
1053 SANITIZER_INTERFACE_ATTRIBUTE
1054 void dfsan_set_arg_tls(uptr offset
, dfsan_label label
) {
1055 // 2x to match ShadowTLSAlignment.
1056 // ShadowTLSAlignment should probably be changed.
1057 // TODO: Consider reducing ShadowTLSAlignment to 1.
1058 // Aligning to 2 bytes is probably a remnant of fast16 mode.
1059 ((dfsan_label
*)__dfsan_arg_tls
)[offset
* 2] = label
;
1062 SANITIZER_INTERFACE_ATTRIBUTE
1063 void dfsan_set_arg_origin_tls(uptr offset
, dfsan_origin o
) {
1064 __dfsan_arg_origin_tls
[offset
] = o
;
1067 extern "C" void dfsan_flush() {
1068 const uptr maxVirtualAddress
= GetMaxUserVirtualAddress();
1069 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
) {
1070 uptr start
= kMemoryLayout
[i
].start
;
1071 uptr end
= kMemoryLayout
[i
].end
;
1072 uptr size
= end
- start
;
1073 MappingDesc::Type type
= kMemoryLayout
[i
].type
;
1075 if (type
!= MappingDesc::SHADOW
&& type
!= MappingDesc::ORIGIN
)
1078 // Check if the segment should be mapped based on platform constraints.
1079 if (start
>= maxVirtualAddress
)
1082 if (!MmapFixedSuperNoReserve(start
, size
, kMemoryLayout
[i
].name
)) {
1083 Printf("FATAL: DataFlowSanitizer: failed to clear memory region\n");
1087 labels_in_signal_conditional
= 0;
1088 labels_in_signal_reaches_function
= 0;
1091 // TODO: CheckMemoryLayoutSanity is based on msan.
1092 // Consider refactoring these into a shared implementation.
1093 static void CheckMemoryLayoutSanity() {
1095 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
) {
1096 uptr start
= kMemoryLayout
[i
].start
;
1097 uptr end
= kMemoryLayout
[i
].end
;
1098 MappingDesc::Type type
= kMemoryLayout
[i
].type
;
1099 CHECK_LT(start
, end
);
1100 CHECK_EQ(prev_end
, start
);
1101 CHECK(addr_is_type(start
, type
));
1102 CHECK(addr_is_type((start
+ end
) / 2, type
));
1103 CHECK(addr_is_type(end
- 1, type
));
1104 if (type
== MappingDesc::APP
) {
1106 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr
)));
1107 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr
)));
1108 CHECK_EQ(MEM_TO_ORIGIN(addr
), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr
)));
1110 addr
= (start
+ end
) / 2;
1111 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr
)));
1112 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr
)));
1113 CHECK_EQ(MEM_TO_ORIGIN(addr
), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr
)));
1116 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr
)));
1117 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr
)));
1118 CHECK_EQ(MEM_TO_ORIGIN(addr
), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr
)));
1124 // TODO: CheckMemoryRangeAvailability is based on msan.
1125 // Consider refactoring these into a shared implementation.
1126 static bool CheckMemoryRangeAvailability(uptr beg
, uptr size
, bool verbose
) {
1128 uptr end
= beg
+ size
- 1;
1129 if (!MemoryRangeIsAvailable(beg
, end
)) {
1131 Printf("FATAL: Memory range %p - %p is not available.\n", beg
, end
);
1138 // TODO: ProtectMemoryRange is based on msan.
1139 // Consider refactoring these into a shared implementation.
1140 static bool ProtectMemoryRange(uptr beg
, uptr size
, const char *name
) {
1142 void *addr
= MmapFixedNoAccess(beg
, size
, name
);
1143 if (beg
== 0 && addr
) {
1144 // Depending on the kernel configuration, we may not be able to protect
1145 // the page at address zero.
1146 uptr gap
= 16 * GetPageSizeCached();
1149 addr
= MmapFixedNoAccess(beg
, size
, name
);
1151 if ((uptr
)addr
!= beg
) {
1152 uptr end
= beg
+ size
- 1;
1153 Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg
, end
,
1161 // TODO: InitShadow is based on msan.
1162 // Consider refactoring these into a shared implementation.
1163 static bool InitShadow(bool init_origins
, bool dry_run
) {
1164 // Let user know mapping parameters first.
1165 VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init
);
1166 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
)
1167 VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout
[i
].name
, kMemoryLayout
[i
].start
,
1168 kMemoryLayout
[i
].end
- 1);
1170 CheckMemoryLayoutSanity();
1172 if (!MEM_IS_APP(&__dfsan::dfsan_init
)) {
1174 Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
1175 (uptr
)&__dfsan::dfsan_init
);
1179 const uptr maxVirtualAddress
= GetMaxUserVirtualAddress();
1181 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
) {
1182 uptr start
= kMemoryLayout
[i
].start
;
1183 uptr end
= kMemoryLayout
[i
].end
;
1184 uptr size
= end
- start
;
1185 MappingDesc::Type type
= kMemoryLayout
[i
].type
;
1187 // Check if the segment should be mapped based on platform constraints.
1188 if (start
>= maxVirtualAddress
)
1191 bool map
= type
== MappingDesc::SHADOW
||
1192 (init_origins
&& type
== MappingDesc::ORIGIN
);
1193 bool protect
= type
== MappingDesc::INVALID
||
1194 (!init_origins
&& type
== MappingDesc::ORIGIN
);
1195 CHECK(!(map
&& protect
));
1196 if (!map
&& !protect
) {
1197 CHECK(type
== MappingDesc::APP
|| type
== MappingDesc::ALLOCATOR
);
1199 if (dry_run
&& type
== MappingDesc::ALLOCATOR
&&
1200 !CheckMemoryRangeAvailability(start
, size
, !dry_run
))
1204 if (dry_run
&& !CheckMemoryRangeAvailability(start
, size
, !dry_run
))
1207 !MmapFixedSuperNoReserve(start
, size
, kMemoryLayout
[i
].name
))
1209 if (!dry_run
&& common_flags()->use_madv_dontdump
)
1210 DontDumpShadowMemory(start
, size
);
1213 if (dry_run
&& !CheckMemoryRangeAvailability(start
, size
, !dry_run
))
1215 if (!dry_run
&& !ProtectMemoryRange(start
, size
, kMemoryLayout
[i
].name
))
1223 static bool InitShadowWithReExec(bool init_origins
) {
1224 // Start with dry run: check layout is ok, but don't print warnings because
1225 // warning messages will cause tests to fail (even if we successfully re-exec
1226 // after the warning).
1227 bool success
= InitShadow(init_origins
, true);
1230 // Perhaps ASLR entropy is too high. If ASLR is enabled, re-exec without it.
1231 int old_personality
= personality(0xffffffff);
1233 (old_personality
!= -1) && ((old_personality
& ADDR_NO_RANDOMIZE
) == 0);
1237 "WARNING: DataflowSanitizer: memory layout is incompatible, "
1238 "possibly due to high-entropy ASLR.\n"
1239 "Re-execing with fixed virtual address space.\n"
1240 "N.B. reducing ASLR entropy is preferable.\n");
1241 CHECK_NE(personality(old_personality
| ADDR_NO_RANDOMIZE
), -1);
1247 // The earlier dry run didn't actually map or protect anything. Run again in
1248 // non-dry run mode.
1249 return success
&& InitShadow(init_origins
, false);
1252 static void DFsanInit(int argc
, char **argv
, char **envp
) {
1253 CHECK(!dfsan_init_is_running
);
1256 dfsan_init_is_running
= true;
1257 SanitizerToolName
= "DataflowSanitizer";
1259 AvoidCVE_2016_2143();
1265 InitializePlatformEarly();
1267 if (!InitShadowWithReExec(dfsan_get_track_origins())) {
1268 Printf("FATAL: DataflowSanitizer can not mmap the shadow memory.\n");
1273 initialize_interceptors();
1276 DFsanTSDInit(DFsanTSDDtor
);
1278 dfsan_allocator_init();
1280 DFsanThread
*main_thread
= DFsanThread::Create(nullptr, nullptr);
1281 SetCurrentThread(main_thread
);
1282 main_thread
->Init();
1284 dfsan_init_is_running
= false;
1285 dfsan_inited
= true;
1288 void __dfsan::dfsan_init() { DFsanInit(0, nullptr, nullptr); }
1290 #if SANITIZER_CAN_USE_PREINIT_ARRAY
1291 __attribute__((section(".preinit_array"),
1292 used
)) static void (*dfsan_init_ptr
)(int, char **,
1293 char **) = DFsanInit
;