1 //===-- dfsan.cpp ---------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of DataFlowSanitizer.
11 // DataFlowSanitizer runtime. This file defines the public interface to
12 // DataFlowSanitizer as well as the definition of certain runtime functions
13 // called automatically by the compiler (specifically the instrumentation pass
14 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
16 // The public interface is defined in include/sanitizer/dfsan_interface.h whose
17 // functions are prefixed dfsan_ while the compiler interface functions are
19 //===----------------------------------------------------------------------===//
21 #include "dfsan/dfsan.h"
23 #include "dfsan/dfsan_chained_origin_depot.h"
24 #include "dfsan/dfsan_flags.h"
25 #include "dfsan/dfsan_origin.h"
26 #include "dfsan/dfsan_thread.h"
27 #include "sanitizer_common/sanitizer_atomic.h"
28 #include "sanitizer_common/sanitizer_common.h"
29 #include "sanitizer_common/sanitizer_file.h"
30 #include "sanitizer_common/sanitizer_flag_parser.h"
31 #include "sanitizer_common/sanitizer_flags.h"
32 #include "sanitizer_common/sanitizer_internal_defs.h"
33 #include "sanitizer_common/sanitizer_libc.h"
34 #include "sanitizer_common/sanitizer_report_decorator.h"
35 #include "sanitizer_common/sanitizer_stacktrace.h"
37 using namespace __dfsan
;
39 Flags
__dfsan::flags_data
;
41 // The size of TLS variables. These constants must be kept in sync with the ones
42 // in DataFlowSanitizer.cpp.
43 static const int kDFsanArgTlsSize
= 800;
44 static const int kDFsanRetvalTlsSize
= 800;
45 static const int kDFsanArgOriginTlsSize
= 800;
47 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64
48 __dfsan_retval_tls
[kDFsanRetvalTlsSize
/ sizeof(u64
)];
49 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls
;
50 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64
51 __dfsan_arg_tls
[kDFsanArgTlsSize
/ sizeof(u64
)];
52 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32
53 __dfsan_arg_origin_tls
[kDFsanArgOriginTlsSize
/ sizeof(u32
)];
55 // Instrumented code may set this value in terms of -dfsan-track-origins.
56 // * undefined or 0: do not track origins.
57 // * 1: track origins at memory store operations.
58 // * 2: track origins at memory load and store operations.
59 // TODO: track callsites.
60 extern "C" SANITIZER_WEAK_ATTRIBUTE
const int __dfsan_track_origins
;
62 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
int dfsan_get_track_origins() {
63 return &__dfsan_track_origins
? __dfsan_track_origins
: 0;
66 // On Linux/x86_64, memory is laid out as follows:
68 // +--------------------+ 0x800000000000 (top of memory)
70 // +--------------------+ 0x700000000000
72 // +--------------------+ 0x610000000000
74 // +--------------------+ 0x600000000000
76 // +--------------------+ 0x510000000000
78 // +--------------------+ 0x500000000000
80 // +--------------------+ 0x400000000000
82 // +--------------------+ 0x300000000000
84 // +--------------------+ 0x200000000000
86 // +--------------------+ 0x110000000000
88 // +--------------------+ 0x100000000000
90 // +--------------------+ 0x010000000000
92 // +--------------------+ 0x000000000000
94 // MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
95 // SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
97 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
98 dfsan_label
__dfsan_union_load(const dfsan_label
*ls
, uptr n
) {
99 dfsan_label label
= ls
[0];
100 for (uptr i
= 1; i
!= n
; ++i
)
105 // Return the union of all the n labels from addr at the high 32 bit, and the
106 // origin of the first taint byte at the low 32 bit.
107 extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64
108 __dfsan_load_label_and_origin(const void *addr
, uptr n
) {
109 dfsan_label label
= 0;
112 dfsan_label
*s
= shadow_for((void *)p
);
113 for (uptr i
= 0; i
< n
; ++i
) {
114 dfsan_label l
= s
[i
];
119 ret
= *(dfsan_origin
*)origin_for((void *)(p
+ i
));
121 return ret
| (u64
)label
<< 32;
124 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
125 void __dfsan_unimplemented(char *fname
) {
126 if (flags().warn_unimplemented
)
127 Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",
131 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_wrapper_extern_weak_null(
132 const void *addr
, char *fname
) {
135 "ERROR: DataFlowSanitizer: dfsan generated wrapper calling null "
136 "extern_weak function %s\nIf this only happens with dfsan, the "
137 "dfsan instrumentation pass may be accidentally optimizing out a "
142 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function
143 // to try to figure out where labels are being introduced in a nominally
144 // label-free program.
145 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_nonzero_label() {
146 if (flags().warn_nonzero_labels
)
147 Report("WARNING: DataFlowSanitizer: saw nonzero label\n");
150 // Indirect call to an uninstrumented vararg function. We don't have a way of
151 // handling these at the moment.
152 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
153 __dfsan_vararg_wrapper(const char *fname
) {
154 Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "
155 "function %s\n", fname
);
159 // Resolves the union of two labels.
160 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
161 dfsan_union(dfsan_label l1
, dfsan_label l2
) {
165 static const uptr kOriginAlign
= sizeof(dfsan_origin
);
166 static const uptr kOriginAlignMask
= ~(kOriginAlign
- 1UL);
168 static uptr
OriginAlignUp(uptr u
) {
169 return (u
+ kOriginAlign
- 1) & kOriginAlignMask
;
172 static uptr
OriginAlignDown(uptr u
) { return u
& kOriginAlignMask
; }
174 // Return the origin of the first taint byte in the size bytes from the address
176 static dfsan_origin
GetOriginIfTainted(uptr addr
, uptr size
) {
177 for (uptr i
= 0; i
< size
; ++i
, ++addr
) {
178 dfsan_label
*s
= shadow_for((void *)addr
);
181 // Validate address region.
182 CHECK(MEM_IS_SHADOW(s
));
183 return *(dfsan_origin
*)origin_for((void *)addr
);
189 // For platforms which support slow unwinder only, we need to restrict the store
190 // context size to 1, basically only storing the current pc, because the slow
191 // unwinder which is based on libunwind is not async signal safe and causes
192 // random freezes in forking applications as well as in signal handlers.
193 // DFSan supports only Linux. So we do not restrict the store context size.
194 #define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \
195 BufferedStackTrace stack; \
196 stack.Unwind(pc, bp, nullptr, true, flags().store_context_size);
198 #define PRINT_CALLER_STACK_TRACE \
201 GET_STORE_STACK_TRACE_PC_BP(pc, bp) \
205 // Return a chain with the previous ID id and the current stack.
206 // from_init = true if this is the first chain of an origin tracking path.
207 static u32
ChainOrigin(u32 id
, StackTrace
*stack
, bool from_init
= false) {
208 // StackDepot is not async signal safe. Do not create new chains in a signal
210 DFsanThread
*t
= GetCurrentThread();
211 if (t
&& t
->InSignalHandler())
214 // As an optimization the origin of an application byte is updated only when
215 // its shadow is non-zero. Because we are only interested in the origins of
216 // taint labels, it does not matter what origin a zero label has. This reduces
217 // memory write cost. MSan does similar optimization. The following invariant
218 // may not hold because of some bugs. We check the invariant to help debug.
219 if (!from_init
&& id
== 0 && flags().check_origin_invariant
) {
220 Printf(" DFSan found invalid origin invariant\n");
221 PRINT_CALLER_STACK_TRACE
224 Origin o
= Origin::FromRawId(id
);
225 stack
->tag
= StackTrace::TAG_UNKNOWN
;
226 Origin chained
= Origin::CreateChainedOrigin(o
, stack
);
227 return chained
.raw_id();
230 static void ChainAndWriteOriginIfTainted(uptr src
, uptr size
, uptr dst
,
232 dfsan_origin o
= GetOriginIfTainted(src
, size
);
234 o
= ChainOrigin(o
, stack
);
235 *(dfsan_origin
*)origin_for((void *)dst
) = o
;
239 // Copy the origins of the size bytes from src to dst. The source and target
240 // memory ranges cannot be overlapped. This is used by memcpy. stack records the
241 // stack trace of the memcpy. When dst and src are not 4-byte aligned properly,
242 // origins at the unaligned address boundaries may be overwritten because four
243 // contiguous bytes share the same origin.
244 static void CopyOrigin(const void *dst
, const void *src
, uptr size
,
247 uptr beg
= OriginAlignDown(d
);
248 // Copy left unaligned origin if that memory is tainted.
250 ChainAndWriteOriginIfTainted((uptr
)src
, beg
+ kOriginAlign
- d
, beg
, stack
);
254 uptr end
= OriginAlignDown(d
+ size
);
255 // If both ends fall into the same 4-byte slot, we are done.
259 // Copy right unaligned origin if that memory is tainted.
261 ChainAndWriteOriginIfTainted((uptr
)src
+ (end
- d
), (d
+ size
) - end
, end
,
268 uptr src_a
= OriginAlignUp((uptr
)src
);
269 dfsan_origin
*src_o
= origin_for((void *)src_a
);
270 u32
*src_s
= (u32
*)shadow_for((void *)src_a
);
271 dfsan_origin
*src_end
= origin_for((void *)(src_a
+ (end
- beg
)));
272 dfsan_origin
*dst_o
= origin_for((void *)beg
);
273 dfsan_origin last_src_o
= 0;
274 dfsan_origin last_dst_o
= 0;
275 for (; src_o
< src_end
; ++src_o
, ++src_s
, ++dst_o
) {
278 if (*src_o
!= last_src_o
) {
280 last_dst_o
= ChainOrigin(last_src_o
, stack
);
286 // Copy the origins of the size bytes from src to dst. The source and target
287 // memory ranges may be overlapped. So the copy is done in a reverse order.
288 // This is used by memmove. stack records the stack trace of the memmove.
289 static void ReverseCopyOrigin(const void *dst
, const void *src
, uptr size
,
292 uptr end
= OriginAlignDown(d
+ size
);
294 // Copy right unaligned origin if that memory is tainted.
296 ChainAndWriteOriginIfTainted((uptr
)src
+ (end
- d
), (d
+ size
) - end
, end
,
299 uptr beg
= OriginAlignDown(d
);
301 if (beg
+ kOriginAlign
< end
) {
303 uptr src_a
= OriginAlignUp((uptr
)src
);
304 void *src_end
= (void *)(src_a
+ end
- beg
- kOriginAlign
);
305 dfsan_origin
*src_end_o
= origin_for(src_end
);
306 u32
*src_end_s
= (u32
*)shadow_for(src_end
);
307 dfsan_origin
*src_begin_o
= origin_for((void *)src_a
);
308 dfsan_origin
*dst
= origin_for((void *)(end
- kOriginAlign
));
309 dfsan_origin last_src_o
= 0;
310 dfsan_origin last_dst_o
= 0;
311 for (; src_end_o
>= src_begin_o
; --src_end_o
, --src_end_s
, --dst
) {
314 if (*src_end_o
!= last_src_o
) {
315 last_src_o
= *src_end_o
;
316 last_dst_o
= ChainOrigin(last_src_o
, stack
);
322 // Copy left unaligned origin if that memory is tainted.
324 ChainAndWriteOriginIfTainted((uptr
)src
, beg
+ kOriginAlign
- d
, beg
, stack
);
327 // Copy or move the origins of the len bytes from src to dst. The source and
328 // target memory ranges may or may not be overlapped. This is used by memory
329 // transfer operations. stack records the stack trace of the memory transfer
331 static void MoveOrigin(const void *dst
, const void *src
, uptr size
,
333 // Validate address regions.
334 if (!MEM_IS_SHADOW(shadow_for(dst
)) ||
335 !MEM_IS_SHADOW(shadow_for((void *)((uptr
)dst
+ size
))) ||
336 !MEM_IS_SHADOW(shadow_for(src
)) ||
337 !MEM_IS_SHADOW(shadow_for((void *)((uptr
)src
+ size
)))) {
341 // If destination origin range overlaps with source origin range, move
342 // origins by copying origins in a reverse order; otherwise, copy origins in
343 // a normal order. The orders of origin transfer are consistent with the
344 // orders of how memcpy and memmove transfer user data.
345 uptr src_aligned_beg
= OriginAlignDown((uptr
)src
);
346 uptr src_aligned_end
= OriginAlignDown((uptr
)src
+ size
);
347 uptr dst_aligned_beg
= OriginAlignDown((uptr
)dst
);
348 if (dst_aligned_beg
< src_aligned_end
&& dst_aligned_beg
>= src_aligned_beg
)
349 return ReverseCopyOrigin(dst
, src
, size
, stack
);
350 return CopyOrigin(dst
, src
, size
, stack
);
353 // Set the size bytes from the addres dst to be the origin value.
354 static void SetOrigin(const void *dst
, uptr size
, u32 origin
) {
358 // Origin mapping is 4 bytes per 4 bytes of application memory.
359 // Here we extend the range such that its left and right bounds are both
361 uptr x
= unaligned_origin_for((uptr
)dst
);
362 uptr beg
= OriginAlignDown(x
);
363 uptr end
= OriginAlignUp(x
+ size
); // align up.
364 u64 origin64
= ((u64
)origin
<< 32) | origin
;
365 // This is like memset, but the value is 32-bit. We unroll by 2 to write
366 // 64 bits at once. May want to unroll further to get 128-bit stores.
368 if (*(u32
*)beg
!= origin
)
369 *(u32
*)beg
= origin
;
372 for (uptr addr
= beg
; addr
< (end
& ~7UL); addr
+= 8) {
373 if (*(u64
*)addr
== origin64
)
375 *(u64
*)addr
= origin64
;
378 if (*(u32
*)(end
- kOriginAlign
) != origin
)
379 *(u32
*)(end
- kOriginAlign
) = origin
;
382 #define RET_CHAIN_ORIGIN(id) \
384 GET_STORE_STACK_TRACE_PC_BP(pc, bp); \
385 return ChainOrigin(id, &stack);
387 // Return a new origin chain with the previous ID id and the current stack
389 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
390 __dfsan_chain_origin(dfsan_origin id
) {
394 // Return a new origin chain with the previous ID id and the current stack
395 // trace if the label is tainted.
396 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
397 __dfsan_chain_origin_if_tainted(dfsan_label label
, dfsan_origin id
) {
403 // Copy or move the origins of the len bytes from src to dst.
404 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_mem_origin_transfer(
405 const void *dst
, const void *src
, uptr len
) {
409 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
410 MoveOrigin(dst
, src
, len
, &stack
);
413 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_mem_origin_transfer(
414 const void *dst
, const void *src
, uptr len
) {
415 __dfsan_mem_origin_transfer(dst
, src
, len
);
418 static void CopyShadow(void *dst
, const void *src
, uptr len
) {
419 internal_memcpy((void *)__dfsan::shadow_for(dst
),
420 (const void *)__dfsan::shadow_for(src
),
421 len
* sizeof(dfsan_label
));
424 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_mem_shadow_transfer(
425 void *dst
, const void *src
, uptr len
) {
426 CopyShadow(dst
, src
, len
);
429 // Copy shadow and origins of the len bytes from src to dst.
430 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
431 __dfsan_mem_shadow_origin_transfer(void *dst
, const void *src
, uptr size
) {
434 CopyShadow(dst
, src
, size
);
435 if (dfsan_get_track_origins()) {
436 // Duplicating code instead of calling __dfsan_mem_origin_transfer
437 // so that the getting the caller stack frame works correctly.
439 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
440 MoveOrigin(dst
, src
, size
, &stack
);
444 // Copy shadow and origins as per __atomic_compare_exchange.
445 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
446 __dfsan_mem_shadow_origin_conditional_exchange(u8 condition
, void *target
,
448 const void *desired
, uptr size
) {
451 // condition is result of native call to __atomic_compare_exchange
453 // Copy desired into target
457 // Copy target into expected
463 CopyShadow(dst
, src
, size
);
464 if (dfsan_get_track_origins()) {
465 // Duplicating code instead of calling __dfsan_mem_origin_transfer
466 // so that the getting the caller stack frame works correctly.
468 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
469 MoveOrigin(dst
, src
, size
, &stack
);
475 bool dfsan_inited
= false;
476 bool dfsan_init_is_running
= false;
478 void dfsan_copy_memory(void *dst
, const void *src
, uptr size
) {
479 internal_memcpy(dst
, src
, size
);
480 dfsan_mem_shadow_transfer(dst
, src
, size
);
481 if (dfsan_get_track_origins())
482 dfsan_mem_origin_transfer(dst
, src
, size
);
485 // Releases the pages within the origin address range.
486 static void ReleaseOrigins(void *addr
, uptr size
) {
487 const uptr beg_origin_addr
= (uptr
)__dfsan::origin_for(addr
);
488 const void *end_addr
= (void *)((uptr
)addr
+ size
);
489 const uptr end_origin_addr
= (uptr
)__dfsan::origin_for(end_addr
);
491 if (end_origin_addr
- beg_origin_addr
<
492 common_flags()->clear_shadow_mmap_threshold
)
495 const uptr page_size
= GetPageSizeCached();
496 const uptr beg_aligned
= RoundUpTo(beg_origin_addr
, page_size
);
497 const uptr end_aligned
= RoundDownTo(end_origin_addr
, page_size
);
499 if (!MmapFixedSuperNoReserve(beg_aligned
, end_aligned
- beg_aligned
))
503 static void WriteZeroShadowInRange(uptr beg
, uptr end
) {
504 // Don't write the label if it is already the value we need it to be.
505 // In a program where most addresses are not labeled, it is common that
506 // a page of shadow memory is entirely zeroed. The Linux copy-on-write
507 // implementation will share all of the zeroed pages, making a copy of a
508 // page when any value is written. The un-sharing will happen even if
509 // the value written does not change the value in memory. Avoiding the
510 // write when both |label| and |*labelp| are zero dramatically reduces
511 // the amount of real memory used by large programs.
512 if (!mem_is_zero((const char *)beg
, end
- beg
))
513 internal_memset((void *)beg
, 0, end
- beg
);
516 // Releases the pages within the shadow address range, and sets
517 // the shadow addresses not on the pages to be 0.
518 static void ReleaseOrClearShadows(void *addr
, uptr size
) {
519 const uptr beg_shadow_addr
= (uptr
)__dfsan::shadow_for(addr
);
520 const void *end_addr
= (void *)((uptr
)addr
+ size
);
521 const uptr end_shadow_addr
= (uptr
)__dfsan::shadow_for(end_addr
);
523 if (end_shadow_addr
- beg_shadow_addr
<
524 common_flags()->clear_shadow_mmap_threshold
) {
525 WriteZeroShadowInRange(beg_shadow_addr
, end_shadow_addr
);
529 const uptr page_size
= GetPageSizeCached();
530 const uptr beg_aligned
= RoundUpTo(beg_shadow_addr
, page_size
);
531 const uptr end_aligned
= RoundDownTo(end_shadow_addr
, page_size
);
533 if (beg_aligned
>= end_aligned
) {
534 WriteZeroShadowInRange(beg_shadow_addr
, end_shadow_addr
);
536 if (beg_aligned
!= beg_shadow_addr
)
537 WriteZeroShadowInRange(beg_shadow_addr
, beg_aligned
);
538 if (end_aligned
!= end_shadow_addr
)
539 WriteZeroShadowInRange(end_aligned
, end_shadow_addr
);
540 if (!MmapFixedSuperNoReserve(beg_aligned
, end_aligned
- beg_aligned
))
545 void SetShadow(dfsan_label label
, void *addr
, uptr size
, dfsan_origin origin
) {
547 const uptr beg_shadow_addr
= (uptr
)__dfsan::shadow_for(addr
);
548 internal_memset((void *)beg_shadow_addr
, label
, size
);
549 if (dfsan_get_track_origins())
550 SetOrigin(addr
, size
, origin
);
554 if (dfsan_get_track_origins())
555 ReleaseOrigins(addr
, size
);
557 ReleaseOrClearShadows(addr
, size
);
560 } // namespace __dfsan
562 // If the label s is tainted, set the size bytes from the address p to be a new
563 // origin chain with the previous ID o and the current stack trace. This is
564 // used by instrumentation to reduce code size when too much code is inserted.
565 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_maybe_store_origin(
566 dfsan_label s
, void *p
, uptr size
, dfsan_origin o
) {
569 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
570 SetOrigin(p
, size
, ChainOrigin(o
, &stack
));
574 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_set_label(
575 dfsan_label label
, dfsan_origin origin
, void *addr
, uptr size
) {
576 __dfsan::SetShadow(label
, addr
, size
, origin
);
579 SANITIZER_INTERFACE_ATTRIBUTE
580 void dfsan_set_label(dfsan_label label
, void *addr
, uptr size
) {
581 dfsan_origin init_origin
= 0;
582 if (label
&& dfsan_get_track_origins()) {
584 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
585 init_origin
= ChainOrigin(0, &stack
, true);
587 __dfsan::SetShadow(label
, addr
, size
, init_origin
);
590 SANITIZER_INTERFACE_ATTRIBUTE
591 void dfsan_add_label(dfsan_label label
, void *addr
, uptr size
) {
595 if (dfsan_get_track_origins()) {
597 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
598 dfsan_origin init_origin
= ChainOrigin(0, &stack
, true);
599 SetOrigin(addr
, size
, init_origin
);
602 for (dfsan_label
*labelp
= shadow_for(addr
); size
!= 0; --size
, ++labelp
)
606 // Unlike the other dfsan interface functions the behavior of this function
607 // depends on the label of one of its arguments. Hence it is implemented as a
609 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
610 __dfsw_dfsan_get_label(long data
, dfsan_label data_label
,
611 dfsan_label
*ret_label
) {
616 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
__dfso_dfsan_get_label(
617 long data
, dfsan_label data_label
, dfsan_label
*ret_label
,
618 dfsan_origin data_origin
, dfsan_origin
*ret_origin
) {
624 // This function is used if dfsan_get_origin is called when origin tracking is
626 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfsw_dfsan_get_origin(
627 long data
, dfsan_label data_label
, dfsan_label
*ret_label
) {
632 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfso_dfsan_get_origin(
633 long data
, dfsan_label data_label
, dfsan_label
*ret_label
,
634 dfsan_origin data_origin
, dfsan_origin
*ret_origin
) {
640 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
641 dfsan_read_label(const void *addr
, uptr size
) {
644 return __dfsan_union_load(shadow_for(addr
), size
);
647 SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
648 dfsan_read_origin_of_first_taint(const void *addr
, uptr size
) {
649 return GetOriginIfTainted((uptr
)addr
, size
);
652 SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_set_label_origin(dfsan_label label
,
656 __dfsan_set_label(label
, origin
, addr
, size
);
659 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
int
660 dfsan_has_label(dfsan_label label
, dfsan_label elem
) {
661 return (label
& elem
) == elem
;
666 typedef void (*dfsan_conditional_callback_t
)(dfsan_label label
,
667 dfsan_origin origin
);
668 static dfsan_conditional_callback_t conditional_callback
= nullptr;
669 static dfsan_label labels_in_signal_conditional
= 0;
671 static void ConditionalCallback(dfsan_label label
, dfsan_origin origin
) {
672 // Programs have many branches. For efficiency the conditional sink callback
673 // handler needs to ignore as many as possible as early as possible.
677 if (conditional_callback
== nullptr) {
681 // This initial ConditionalCallback handler needs to be in here in dfsan
682 // runtime (rather than being an entirely user implemented hook) so that it
683 // has access to dfsan thread information.
684 DFsanThread
*t
= GetCurrentThread();
685 // A callback operation which does useful work (like record the flow) will
686 // likely be too long executed in a signal handler.
687 if (t
&& t
->InSignalHandler()) {
688 // Record set of labels used in signal handler for completeness.
689 labels_in_signal_conditional
|= label
;
693 conditional_callback(label
, origin
);
696 } // namespace __dfsan
698 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
699 __dfsan_conditional_callback_origin(dfsan_label label
, dfsan_origin origin
) {
700 __dfsan::ConditionalCallback(label
, origin
);
703 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __dfsan_conditional_callback(
705 __dfsan::ConditionalCallback(label
, 0);
708 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_set_conditional_callback(
709 __dfsan::dfsan_conditional_callback_t callback
) {
710 __dfsan::conditional_callback
= callback
;
713 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
714 dfsan_get_labels_in_signal_conditional() {
715 return __dfsan::labels_in_signal_conditional
;
720 typedef void (*dfsan_reaches_function_callback_t
)(dfsan_label label
,
724 const char *function
);
725 static dfsan_reaches_function_callback_t reaches_function_callback
= nullptr;
726 static dfsan_label labels_in_signal_reaches_function
= 0;
728 static void ReachesFunctionCallback(dfsan_label label
, dfsan_origin origin
,
729 const char *file
, unsigned int line
,
730 const char *function
) {
734 if (reaches_function_callback
== nullptr) {
738 // This initial ReachesFunctionCallback handler needs to be in here in dfsan
739 // runtime (rather than being an entirely user implemented hook) so that it
740 // has access to dfsan thread information.
741 DFsanThread
*t
= GetCurrentThread();
742 // A callback operation which does useful work (like record the flow) will
743 // likely be too long executed in a signal handler.
744 if (t
&& t
->InSignalHandler()) {
745 // Record set of labels used in signal handler for completeness.
746 labels_in_signal_reaches_function
|= label
;
750 reaches_function_callback(label
, origin
, file
, line
, function
);
753 } // namespace __dfsan
755 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
756 __dfsan_reaches_function_callback_origin(dfsan_label label
, dfsan_origin origin
,
757 const char *file
, unsigned int line
,
758 const char *function
) {
759 __dfsan::ReachesFunctionCallback(label
, origin
, file
, line
, function
);
762 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
763 __dfsan_reaches_function_callback(dfsan_label label
, const char *file
,
764 unsigned int line
, const char *function
) {
765 __dfsan::ReachesFunctionCallback(label
, 0, file
, line
, function
);
768 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void
769 dfsan_set_reaches_function_callback(
770 __dfsan::dfsan_reaches_function_callback_t callback
) {
771 __dfsan::reaches_function_callback
= callback
;
774 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
775 dfsan_get_labels_in_signal_reaches_function() {
776 return __dfsan::labels_in_signal_reaches_function
;
779 class Decorator
: public __sanitizer::SanitizerCommonDecorator
{
781 Decorator() : SanitizerCommonDecorator() {}
782 const char *Origin() const { return Magenta(); }
787 void PrintNoOriginTrackingWarning() {
790 " %sDFSan: origin tracking is not enabled. Did you specify the "
791 "-dfsan-track-origins=1 option?%s\n",
792 d
.Warning(), d
.Default());
795 void PrintNoTaintWarning(const void *address
) {
797 Printf(" %sDFSan: no tainted value at %x%s\n", d
.Warning(), address
,
801 void PrintInvalidOriginWarning(dfsan_label label
, const void *address
) {
804 " %sTaint value 0x%x (at %p) has invalid origin tracking. This can "
805 "be a DFSan bug.%s\n",
806 d
.Warning(), label
, address
, d
.Default());
809 void PrintInvalidOriginIdWarning(dfsan_origin origin
) {
812 " %sOrigin Id %d has invalid origin tracking. This can "
813 "be a DFSan bug.%s\n",
814 d
.Warning(), origin
, d
.Default());
817 bool PrintOriginTraceFramesToStr(Origin o
, InternalScopedString
*out
) {
821 while (o
.isChainedOrigin()) {
823 dfsan_origin origin_id
= o
.raw_id();
824 o
= o
.getNextChainedOrigin(&stack
);
825 if (o
.isChainedOrigin())
827 " %sOrigin value: 0x%x, Taint value was stored to memory at%s\n",
828 d
.Origin(), origin_id
, d
.Default());
830 out
->AppendF(" %sOrigin value: 0x%x, Taint value was created at%s\n",
831 d
.Origin(), origin_id
, d
.Default());
833 // Includes a trailing newline, so no need to add it again.
841 bool PrintOriginTraceToStr(const void *addr
, const char *description
,
842 InternalScopedString
*out
) {
844 CHECK(dfsan_get_track_origins());
847 const dfsan_label label
= *__dfsan::shadow_for(addr
);
850 const dfsan_origin origin
= *__dfsan::origin_for(addr
);
852 out
->AppendF(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n",
853 d
.Origin(), label
, addr
, description
? description
: "",
856 Origin o
= Origin::FromRawId(origin
);
857 return PrintOriginTraceFramesToStr(o
, out
);
862 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_print_origin_trace(
863 const void *addr
, const char *description
) {
864 if (!dfsan_get_track_origins()) {
865 PrintNoOriginTrackingWarning();
869 const dfsan_label label
= *__dfsan::shadow_for(addr
);
871 PrintNoTaintWarning(addr
);
875 InternalScopedString trace
;
876 bool success
= PrintOriginTraceToStr(addr
, description
, &trace
);
879 Printf("%s", trace
.data());
882 PrintInvalidOriginWarning(label
, addr
);
885 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
886 dfsan_sprint_origin_trace(const void *addr
, const char *description
,
887 char *out_buf
, uptr out_buf_size
) {
890 if (!dfsan_get_track_origins()) {
891 PrintNoOriginTrackingWarning();
895 const dfsan_label label
= *__dfsan::shadow_for(addr
);
897 PrintNoTaintWarning(addr
);
901 InternalScopedString trace
;
902 bool success
= PrintOriginTraceToStr(addr
, description
, &trace
);
905 PrintInvalidOriginWarning(label
, addr
);
910 internal_strncpy(out_buf
, trace
.data(), out_buf_size
- 1);
911 out_buf
[out_buf_size
- 1] = '\0';
914 return trace
.length();
917 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_print_origin_id_trace(
918 dfsan_origin origin
) {
919 if (!dfsan_get_track_origins()) {
920 PrintNoOriginTrackingWarning();
923 Origin o
= Origin::FromRawId(origin
);
925 InternalScopedString trace
;
926 bool success
= PrintOriginTraceFramesToStr(o
, &trace
);
929 Printf("%s", trace
.data());
932 PrintInvalidOriginIdWarning(origin
);
935 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
dfsan_sprint_origin_id_trace(
936 dfsan_origin origin
, char *out_buf
, uptr out_buf_size
) {
939 if (!dfsan_get_track_origins()) {
940 PrintNoOriginTrackingWarning();
943 Origin o
= Origin::FromRawId(origin
);
945 InternalScopedString trace
;
946 bool success
= PrintOriginTraceFramesToStr(o
, &trace
);
949 PrintInvalidOriginIdWarning(origin
);
954 internal_strncpy(out_buf
, trace
.data(), out_buf_size
- 1);
955 out_buf
[out_buf_size
- 1] = '\0';
958 return trace
.length();
961 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
962 dfsan_get_init_origin(const void *addr
) {
963 if (!dfsan_get_track_origins())
966 const dfsan_label label
= *__dfsan::shadow_for(addr
);
970 const dfsan_origin origin
= *__dfsan::origin_for(addr
);
972 Origin o
= Origin::FromRawId(origin
);
973 dfsan_origin origin_id
= o
.raw_id();
974 while (o
.isChainedOrigin()) {
976 origin_id
= o
.raw_id();
977 o
= o
.getNextChainedOrigin(&stack
);
982 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc
, uptr bp
,
986 using namespace __dfsan
;
987 DFsanThread
*t
= GetCurrentThread();
988 if (!t
|| !StackTrace::WillUseFastUnwind(request_fast
)) {
989 return Unwind(max_depth
, pc
, bp
, context
, 0, 0, false);
991 Unwind(max_depth
, pc
, bp
, nullptr, t
->stack_top(), t
->stack_bottom(), true);
994 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void __sanitizer_print_stack_trace() {
996 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
1000 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
1001 dfsan_sprint_stack_trace(char *out_buf
, uptr out_buf_size
) {
1004 GET_STORE_STACK_TRACE_PC_BP(pc
, bp
);
1005 return stack
.PrintTo(out_buf
, out_buf_size
);
1008 void Flags::SetDefaults() {
1009 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
1010 #include "dfsan_flags.inc"
1014 static void RegisterDfsanFlags(FlagParser
*parser
, Flags
*f
) {
1015 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \
1016 RegisterFlag(parser, #Name, Description, &f->Name);
1017 #include "dfsan_flags.inc"
1021 static void InitializeFlags() {
1022 SetCommonFlagsDefaults();
1025 cf
.CopyFrom(*common_flags());
1026 cf
.intercept_tls_get_addr
= true;
1027 OverrideCommonFlags(cf
);
1029 flags().SetDefaults();
1032 RegisterCommonFlags(&parser
);
1033 RegisterDfsanFlags(&parser
, &flags());
1034 parser
.ParseStringFromEnv("DFSAN_OPTIONS");
1035 InitializeCommonFlags();
1036 if (Verbosity()) ReportUnrecognizedFlags();
1037 if (common_flags()->help
) parser
.PrintFlagDescriptions();
1040 SANITIZER_INTERFACE_ATTRIBUTE
1041 void dfsan_clear_arg_tls(uptr offset
, uptr size
) {
1042 internal_memset((void *)((uptr
)__dfsan_arg_tls
+ offset
), 0, size
);
1045 SANITIZER_INTERFACE_ATTRIBUTE
1046 void dfsan_clear_thread_local_state() {
1047 internal_memset(__dfsan_arg_tls
, 0, sizeof(__dfsan_arg_tls
));
1048 internal_memset(__dfsan_retval_tls
, 0, sizeof(__dfsan_retval_tls
));
1050 if (dfsan_get_track_origins()) {
1051 internal_memset(__dfsan_arg_origin_tls
, 0, sizeof(__dfsan_arg_origin_tls
));
1052 internal_memset(&__dfsan_retval_origin_tls
, 0,
1053 sizeof(__dfsan_retval_origin_tls
));
1057 SANITIZER_INTERFACE_ATTRIBUTE
1058 void dfsan_set_arg_tls(uptr offset
, dfsan_label label
) {
1059 // 2x to match ShadowTLSAlignment.
1060 // ShadowTLSAlignment should probably be changed.
1061 // TODO: Consider reducing ShadowTLSAlignment to 1.
1062 // Aligning to 2 bytes is probably a remnant of fast16 mode.
1063 ((dfsan_label
*)__dfsan_arg_tls
)[offset
* 2] = label
;
1066 SANITIZER_INTERFACE_ATTRIBUTE
1067 void dfsan_set_arg_origin_tls(uptr offset
, dfsan_origin o
) {
1068 __dfsan_arg_origin_tls
[offset
] = o
;
1071 extern "C" void dfsan_flush() {
1072 const uptr maxVirtualAddress
= GetMaxUserVirtualAddress();
1073 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
) {
1074 uptr start
= kMemoryLayout
[i
].start
;
1075 uptr end
= kMemoryLayout
[i
].end
;
1076 uptr size
= end
- start
;
1077 MappingDesc::Type type
= kMemoryLayout
[i
].type
;
1079 if (type
!= MappingDesc::SHADOW
&& type
!= MappingDesc::ORIGIN
)
1082 // Check if the segment should be mapped based on platform constraints.
1083 if (start
>= maxVirtualAddress
)
1086 if (!MmapFixedSuperNoReserve(start
, size
, kMemoryLayout
[i
].name
)) {
1087 Printf("FATAL: DataFlowSanitizer: failed to clear memory region\n");
1091 __dfsan::labels_in_signal_conditional
= 0;
1092 __dfsan::labels_in_signal_reaches_function
= 0;
1095 // TODO: CheckMemoryLayoutSanity is based on msan.
1096 // Consider refactoring these into a shared implementation.
1097 static void CheckMemoryLayoutSanity() {
1099 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
) {
1100 uptr start
= kMemoryLayout
[i
].start
;
1101 uptr end
= kMemoryLayout
[i
].end
;
1102 MappingDesc::Type type
= kMemoryLayout
[i
].type
;
1103 CHECK_LT(start
, end
);
1104 CHECK_EQ(prev_end
, start
);
1105 CHECK(addr_is_type(start
, type
));
1106 CHECK(addr_is_type((start
+ end
) / 2, type
));
1107 CHECK(addr_is_type(end
- 1, type
));
1108 if (type
== MappingDesc::APP
) {
1110 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr
)));
1111 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr
)));
1112 CHECK_EQ(MEM_TO_ORIGIN(addr
), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr
)));
1114 addr
= (start
+ end
) / 2;
1115 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr
)));
1116 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr
)));
1117 CHECK_EQ(MEM_TO_ORIGIN(addr
), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr
)));
1120 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr
)));
1121 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr
)));
1122 CHECK_EQ(MEM_TO_ORIGIN(addr
), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr
)));
1128 // TODO: CheckMemoryRangeAvailability is based on msan.
1129 // Consider refactoring these into a shared implementation.
1130 static bool CheckMemoryRangeAvailability(uptr beg
, uptr size
) {
1132 uptr end
= beg
+ size
- 1;
1133 if (!MemoryRangeIsAvailable(beg
, end
)) {
1134 Printf("FATAL: Memory range %p - %p is not available.\n", beg
, end
);
1141 // TODO: ProtectMemoryRange is based on msan.
1142 // Consider refactoring these into a shared implementation.
1143 static bool ProtectMemoryRange(uptr beg
, uptr size
, const char *name
) {
1145 void *addr
= MmapFixedNoAccess(beg
, size
, name
);
1146 if (beg
== 0 && addr
) {
1147 // Depending on the kernel configuration, we may not be able to protect
1148 // the page at address zero.
1149 uptr gap
= 16 * GetPageSizeCached();
1152 addr
= MmapFixedNoAccess(beg
, size
, name
);
1154 if ((uptr
)addr
!= beg
) {
1155 uptr end
= beg
+ size
- 1;
1156 Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg
, end
,
1164 // TODO: InitShadow is based on msan.
1165 // Consider refactoring these into a shared implementation.
1166 bool InitShadow(bool init_origins
) {
1167 // Let user know mapping parameters first.
1168 VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init
);
1169 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
)
1170 VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout
[i
].name
, kMemoryLayout
[i
].start
,
1171 kMemoryLayout
[i
].end
- 1);
1173 CheckMemoryLayoutSanity();
1175 if (!MEM_IS_APP(&__dfsan::dfsan_init
)) {
1176 Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
1177 (uptr
)&__dfsan::dfsan_init
);
1181 const uptr maxVirtualAddress
= GetMaxUserVirtualAddress();
1183 for (unsigned i
= 0; i
< kMemoryLayoutSize
; ++i
) {
1184 uptr start
= kMemoryLayout
[i
].start
;
1185 uptr end
= kMemoryLayout
[i
].end
;
1186 uptr size
= end
- start
;
1187 MappingDesc::Type type
= kMemoryLayout
[i
].type
;
1189 // Check if the segment should be mapped based on platform constraints.
1190 if (start
>= maxVirtualAddress
)
1193 bool map
= type
== MappingDesc::SHADOW
||
1194 (init_origins
&& type
== MappingDesc::ORIGIN
);
1195 bool protect
= type
== MappingDesc::INVALID
||
1196 (!init_origins
&& type
== MappingDesc::ORIGIN
);
1197 CHECK(!(map
&& protect
));
1198 if (!map
&& !protect
)
1199 CHECK(type
== MappingDesc::APP
);
1201 if (!CheckMemoryRangeAvailability(start
, size
))
1203 if (!MmapFixedSuperNoReserve(start
, size
, kMemoryLayout
[i
].name
))
1205 if (common_flags()->use_madv_dontdump
)
1206 DontDumpShadowMemory(start
, size
);
1209 if (!CheckMemoryRangeAvailability(start
, size
))
1211 if (!ProtectMemoryRange(start
, size
, kMemoryLayout
[i
].name
))
1219 static void DFsanInit(int argc
, char **argv
, char **envp
) {
1220 CHECK(!dfsan_init_is_running
);
1223 dfsan_init_is_running
= true;
1224 SanitizerToolName
= "DataflowSanitizer";
1226 AvoidCVE_2016_2143();
1232 InitShadow(dfsan_get_track_origins());
1234 initialize_interceptors();
1237 DFsanTSDInit(DFsanTSDDtor
);
1239 dfsan_allocator_init();
1241 DFsanThread
*main_thread
= DFsanThread::Create(nullptr, nullptr);
1242 SetCurrentThread(main_thread
);
1243 main_thread
->Init();
1245 dfsan_init_is_running
= false;
1246 dfsan_inited
= true;
1251 void dfsan_init() { DFsanInit(0, nullptr, nullptr); }
1253 } // namespace __dfsan
1255 #if SANITIZER_CAN_USE_PREINIT_ARRAY
1256 __attribute__((section(".preinit_array"),
1257 used
)) static void (*dfsan_init_ptr
)(int, char **,
1258 char **) = DFsanInit
;