1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2019, Google LLC.
8 #include <linux/debug_locks.h>
9 #include <linux/delay.h>
10 #include <linux/jiffies.h>
11 #include <linux/kallsyms.h>
12 #include <linux/kernel.h>
13 #include <linux/lockdep.h>
14 #include <linux/preempt.h>
15 #include <linux/printk.h>
16 #include <linux/sched.h>
17 #include <linux/spinlock.h>
18 #include <linux/stacktrace.h>
24 * Max. number of stack entries to show in the report.
26 #define NUM_STACK_ENTRIES 64
28 /* Common access info. */
30 const volatile void *ptr
;
39 * Other thread info: communicated from other racing thread to thread that set
40 * up the watchpoint, which then prints the complete report atomically.
43 struct access_info ai
;
44 unsigned long stack_entries
[NUM_STACK_ENTRIES
];
45 int num_stack_entries
;
48 * Optionally pass @current. Typically we do not need to pass @current
49 * via @other_info since just @task_pid is sufficient. Passing @current
50 * has additional overhead.
52 * To safely pass @current, we must either use get_task_struct/
53 * put_task_struct, or stall the thread that populated @other_info.
55 * We cannot rely on get_task_struct/put_task_struct in case
56 * release_report() races with a task being released, and would have to
57 * free it in release_report(). This may result in deadlock if we want
58 * to use KCSAN on the allocators.
60 * Since we also want to reliably print held locks for
61 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
62 * that populated @other_info until it has been consumed.
64 struct task_struct
*task
;
68 * To never block any producers of struct other_info, we need as many elements
69 * as we have watchpoints (upper bound on concurrent races to report).
71 static struct other_info other_infos
[CONFIG_KCSAN_NUM_WATCHPOINTS
+ NUM_SLOTS
-1];
74 * Information about reported races; used to rate limit reporting.
78 * The last time the race was reported.
83 * The frames of the 2 threads; if only 1 thread is known, one frame
91 * Since we also want to be able to debug allocators with KCSAN, to avoid
92 * deadlock, report_times cannot be dynamically resized with krealloc in
95 * Therefore, we use a fixed-size array, which at most will occupy a page. This
96 * still adequately rate limits reports, assuming that a) number of unique data
97 * races is not excessive, and b) occurrence of unique races within the
98 * same time window is limited.
100 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
101 #define REPORT_TIMES_SIZE \
102 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \
104 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
105 static struct report_time report_times
[REPORT_TIMES_SIZE
];
108 * Spinlock serializing report generation, and access to @other_infos. Although
109 * it could make sense to have a finer-grained locking story for @other_infos,
110 * report generation needs to be serialized either way, so not much is gained.
112 static DEFINE_RAW_SPINLOCK(report_lock
);
115 * Checks if the race identified by thread frames frame1 and frame2 has
116 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
118 static bool rate_limit_report(unsigned long frame1
, unsigned long frame2
)
120 struct report_time
*use_entry
= &report_times
[0];
121 unsigned long invalid_before
;
124 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS
!= 0 && REPORT_TIMES_SIZE
== 0);
126 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS
== 0)
129 invalid_before
= jiffies
- msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS
);
131 /* Check if a matching race report exists. */
132 for (i
= 0; i
< REPORT_TIMES_SIZE
; ++i
) {
133 struct report_time
*rt
= &report_times
[i
];
136 * Must always select an entry for use to store info as we
137 * cannot resize report_times; at the end of the scan, use_entry
138 * will be the oldest entry, which ideally also happened before
139 * KCSAN_REPORT_ONCE_IN_MS ago.
141 if (time_before(rt
->time
, use_entry
->time
))
145 * Initially, no need to check any further as this entry as well
146 * as following entries have never been used.
151 /* Check if entry expired. */
152 if (time_before(rt
->time
, invalid_before
))
153 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
155 /* Reported recently, check if race matches. */
156 if ((rt
->frame1
== frame1
&& rt
->frame2
== frame2
) ||
157 (rt
->frame1
== frame2
&& rt
->frame2
== frame1
))
161 use_entry
->time
= jiffies
;
162 use_entry
->frame1
= frame1
;
163 use_entry
->frame2
= frame2
;
168 * Special rules to skip reporting.
171 skip_report(enum kcsan_value_change value_change
, unsigned long top_frame
)
173 /* Should never get here if value_change==FALSE. */
174 WARN_ON_ONCE(value_change
== KCSAN_VALUE_CHANGE_FALSE
);
177 * The first call to skip_report always has value_change==TRUE, since we
178 * cannot know the value written of an instrumented access. For the 2nd
179 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
181 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
182 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
183 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
184 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
185 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
186 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
188 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
189 * data races where the write may have rewritten the same value; case 6
190 * is possible either if the size is larger than what we check value
191 * changes for or the access type is KCSAN_ACCESS_ASSERT.
193 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY
) &&
194 value_change
== KCSAN_VALUE_CHANGE_MAYBE
) {
196 * The access is a write, but the data value did not change.
198 * We opt-out of this filter for certain functions at request of
202 int len
= scnprintf(buf
, sizeof(buf
), "%ps", (void *)top_frame
);
204 if (!strnstr(buf
, "rcu_", len
) &&
205 !strnstr(buf
, "_rcu", len
) &&
206 !strnstr(buf
, "_srcu", len
))
210 return kcsan_skip_report_debugfs(top_frame
);
213 static const char *get_access_type(int type
)
215 if (type
& KCSAN_ACCESS_ASSERT
) {
216 if (type
& KCSAN_ACCESS_SCOPED
) {
217 if (type
& KCSAN_ACCESS_WRITE
)
218 return "assert no accesses (reordered)";
220 return "assert no writes (reordered)";
222 if (type
& KCSAN_ACCESS_WRITE
)
223 return "assert no accesses";
225 return "assert no writes";
232 case KCSAN_ACCESS_ATOMIC
:
233 return "read (marked)";
234 case KCSAN_ACCESS_WRITE
:
236 case KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
237 return "write (marked)";
238 case KCSAN_ACCESS_COMPOUND
| KCSAN_ACCESS_WRITE
:
240 case KCSAN_ACCESS_COMPOUND
| KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
241 return "read-write (marked)";
242 case KCSAN_ACCESS_SCOPED
:
243 return "read (reordered)";
244 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_ATOMIC
:
245 return "read (marked, reordered)";
246 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_WRITE
:
247 return "write (reordered)";
248 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
249 return "write (marked, reordered)";
250 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_COMPOUND
| KCSAN_ACCESS_WRITE
:
251 return "read-write (reordered)";
252 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_COMPOUND
| KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
253 return "read-write (marked, reordered)";
259 static const char *get_bug_type(int type
)
261 return (type
& KCSAN_ACCESS_ASSERT
) != 0 ? "assert: race" : "data-race";
264 /* Return thread description: in task or interrupt. */
265 static const char *get_thread_desc(int task_id
)
268 static char buf
[32]; /* safe: protected by report_lock */
270 snprintf(buf
, sizeof(buf
), "task %i", task_id
);
276 /* Helper to skip KCSAN-related functions in stack-trace. */
277 static int get_stack_skipnr(const unsigned long stack_entries
[], int num_entries
)
283 for (skip
= 0; skip
< num_entries
; ++skip
) {
284 len
= scnprintf(buf
, sizeof(buf
), "%ps", (void *)stack_entries
[skip
]);
286 /* Never show tsan_* or {read,write}_once_size. */
287 if (strnstr(buf
, "tsan_", len
) ||
288 strnstr(buf
, "_once_size", len
))
291 cur
= strnstr(buf
, "kcsan_", len
);
293 cur
+= strlen("kcsan_");
294 if (!str_has_prefix(cur
, "test"))
295 continue; /* KCSAN runtime function. */
296 /* KCSAN related test. */
300 * No match for runtime functions -- @skip entries to skip to
301 * get to first frame of interest.
310 * Skips to the first entry that matches the function of @ip, and then replaces
311 * that entry with @ip, returning the entries to skip with @replaced containing
312 * the replaced entry.
315 replace_stack_entry(unsigned long stack_entries
[], int num_entries
, unsigned long ip
,
316 unsigned long *replaced
)
318 unsigned long symbolsize
, offset
;
319 unsigned long target_func
;
322 if (kallsyms_lookup_size_offset(ip
, &symbolsize
, &offset
))
323 target_func
= ip
- offset
;
327 for (skip
= 0; skip
< num_entries
; ++skip
) {
328 unsigned long func
= stack_entries
[skip
];
330 if (!kallsyms_lookup_size_offset(func
, &symbolsize
, &offset
))
334 if (func
== target_func
) {
335 *replaced
= stack_entries
[skip
];
336 stack_entries
[skip
] = ip
;
342 /* Should not happen; the resulting stack trace is likely misleading. */
343 WARN_ONCE(1, "Cannot find frame for %pS in stack trace", (void *)ip
);
344 return get_stack_skipnr(stack_entries
, num_entries
);
348 sanitize_stack_entries(unsigned long stack_entries
[], int num_entries
, unsigned long ip
,
349 unsigned long *replaced
)
351 return ip
? replace_stack_entry(stack_entries
, num_entries
, ip
, replaced
) :
352 get_stack_skipnr(stack_entries
, num_entries
);
355 /* Compares symbolized strings of addr1 and addr2. */
356 static int sym_strcmp(void *addr1
, void *addr2
)
361 snprintf(buf1
, sizeof(buf1
), "%pS", addr1
);
362 snprintf(buf2
, sizeof(buf2
), "%pS", addr2
);
364 return strncmp(buf1
, buf2
, sizeof(buf1
));
368 print_stack_trace(unsigned long stack_entries
[], int num_entries
, unsigned long reordered_to
)
370 stack_trace_print(stack_entries
, num_entries
, 0);
372 pr_err(" |\n +-> reordered to: %pS\n", (void *)reordered_to
);
375 static void print_verbose_info(struct task_struct
*task
)
380 /* Restore IRQ state trace for printing. */
381 kcsan_restore_irqtrace(task
);
384 debug_show_held_locks(task
);
385 print_irqtrace_events(task
);
388 static void print_report(enum kcsan_value_change value_change
,
389 const struct access_info
*ai
,
390 struct other_info
*other_info
,
391 u64 old
, u64
new, u64 mask
)
393 unsigned long reordered_to
= 0;
394 unsigned long stack_entries
[NUM_STACK_ENTRIES
] = { 0 };
395 int num_stack_entries
= stack_trace_save(stack_entries
, NUM_STACK_ENTRIES
, 1);
396 int skipnr
= sanitize_stack_entries(stack_entries
, num_stack_entries
, ai
->ip
, &reordered_to
);
397 unsigned long this_frame
= stack_entries
[skipnr
];
398 unsigned long other_reordered_to
= 0;
399 unsigned long other_frame
= 0;
400 int other_skipnr
= 0; /* silence uninit warnings */
403 * Must check report filter rules before starting to print.
405 if (skip_report(KCSAN_VALUE_CHANGE_TRUE
, stack_entries
[skipnr
]))
409 other_skipnr
= sanitize_stack_entries(other_info
->stack_entries
,
410 other_info
->num_stack_entries
,
411 other_info
->ai
.ip
, &other_reordered_to
);
412 other_frame
= other_info
->stack_entries
[other_skipnr
];
414 /* @value_change is only known for the other thread */
415 if (skip_report(value_change
, other_frame
))
419 if (rate_limit_report(this_frame
, other_frame
))
422 /* Print report header. */
423 pr_err("==================================================================\n");
428 * Order functions lexographically for consistent bug titles.
429 * Do not print offset of functions to keep title short.
431 cmp
= sym_strcmp((void *)other_frame
, (void *)this_frame
);
432 pr_err("BUG: KCSAN: %s in %ps / %ps\n",
433 get_bug_type(ai
->access_type
| other_info
->ai
.access_type
),
434 (void *)(cmp
< 0 ? other_frame
: this_frame
),
435 (void *)(cmp
< 0 ? this_frame
: other_frame
));
437 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai
->access_type
),
443 /* Print information about the racing accesses. */
445 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
446 get_access_type(other_info
->ai
.access_type
), other_info
->ai
.ptr
,
447 other_info
->ai
.size
, get_thread_desc(other_info
->ai
.task_pid
),
448 other_info
->ai
.cpu_id
);
450 /* Print the other thread's stack trace. */
451 print_stack_trace(other_info
->stack_entries
+ other_skipnr
,
452 other_info
->num_stack_entries
- other_skipnr
,
454 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE
))
455 print_verbose_info(other_info
->task
);
458 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
459 get_access_type(ai
->access_type
), ai
->ptr
, ai
->size
,
460 get_thread_desc(ai
->task_pid
), ai
->cpu_id
);
462 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
463 get_access_type(ai
->access_type
), ai
->ptr
, ai
->size
,
464 get_thread_desc(ai
->task_pid
), ai
->cpu_id
);
466 /* Print stack trace of this thread. */
467 print_stack_trace(stack_entries
+ skipnr
, num_stack_entries
- skipnr
, reordered_to
);
468 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE
))
469 print_verbose_info(current
);
471 /* Print observed value change. */
473 int hex_len
= ai
->size
* 2;
474 u64 diff
= old
^ new;
480 pr_err("value changed: 0x%0*llx -> 0x%0*llx\n",
481 hex_len
, old
, hex_len
, new);
483 pr_err(" bits changed: 0x%0*llx with mask 0x%0*llx\n",
484 hex_len
, diff
, hex_len
, mask
);
489 /* Print report footer. */
491 pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
492 dump_stack_print_info(KERN_DEFAULT
);
493 pr_err("==================================================================\n");
495 check_panic_on_warn("KCSAN");
498 static void release_report(unsigned long *flags
, struct other_info
*other_info
)
501 * Use size to denote valid/invalid, since KCSAN entirely ignores
504 other_info
->ai
.size
= 0;
505 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
509 * Sets @other_info->task and awaits consumption of @other_info.
511 * Precondition: report_lock is held.
512 * Postcondition: report_lock is held.
514 static void set_other_info_task_blocking(unsigned long *flags
,
515 const struct access_info
*ai
,
516 struct other_info
*other_info
)
519 * We may be instrumenting a code-path where current->state is already
520 * something other than TASK_RUNNING.
522 const bool is_running
= task_is_running(current
);
524 * To avoid deadlock in case we are in an interrupt here and this is a
525 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
526 * timeout to ensure this works in all contexts.
528 * Await approximately the worst case delay of the reporting thread (if
529 * we are not interrupted).
531 int timeout
= max(kcsan_udelay_task
, kcsan_udelay_interrupt
);
533 other_info
->task
= current
;
537 * Let lockdep know the real task is sleeping, to print
538 * the held locks (recall we turned lockdep off, so
539 * locking/unlocking @report_lock won't be recorded).
541 set_current_state(TASK_UNINTERRUPTIBLE
);
543 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
545 * We cannot call schedule() since we also cannot reliably
546 * determine if sleeping here is permitted -- see in_atomic().
550 raw_spin_lock_irqsave(&report_lock
, *flags
);
553 * Abort. Reset @other_info->task to NULL, since it
554 * appears the other thread is still going to consume
555 * it. It will result in no verbose info printed for
558 other_info
->task
= NULL
;
562 * If invalid, or @ptr nor @current matches, then @other_info
563 * has been consumed and we may continue. If not, retry.
565 } while (other_info
->ai
.size
&& other_info
->ai
.ptr
== ai
->ptr
&&
566 other_info
->task
== current
);
568 set_current_state(TASK_RUNNING
);
571 /* Populate @other_info; requires that the provided @other_info not in use. */
572 static void prepare_report_producer(unsigned long *flags
,
573 const struct access_info
*ai
,
574 struct other_info
*other_info
)
576 raw_spin_lock_irqsave(&report_lock
, *flags
);
579 * The same @other_infos entry cannot be used concurrently, because
580 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
581 * core.c), and a watchpoint is only released for reuse after reporting
582 * is done by the consumer of @other_info. Therefore, it is impossible
583 * for another concurrent prepare_report_producer() to set the same
584 * @other_info, and are guaranteed exclusivity for the @other_infos
585 * entry pointed to by @other_info.
587 * To check this property holds, size should never be non-zero here,
588 * because every consumer of struct other_info resets size to 0 in
591 WARN_ON(other_info
->ai
.size
);
593 other_info
->ai
= *ai
;
594 other_info
->num_stack_entries
= stack_trace_save(other_info
->stack_entries
, NUM_STACK_ENTRIES
, 2);
596 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE
))
597 set_other_info_task_blocking(flags
, ai
, other_info
);
599 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
602 /* Awaits producer to fill @other_info and then returns. */
603 static bool prepare_report_consumer(unsigned long *flags
,
604 const struct access_info
*ai
,
605 struct other_info
*other_info
)
608 raw_spin_lock_irqsave(&report_lock
, *flags
);
609 while (!other_info
->ai
.size
) { /* Await valid @other_info. */
610 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
612 raw_spin_lock_irqsave(&report_lock
, *flags
);
615 /* Should always have a matching access based on watchpoint encoding. */
616 if (WARN_ON(!matching_access((unsigned long)other_info
->ai
.ptr
& WATCHPOINT_ADDR_MASK
, other_info
->ai
.size
,
617 (unsigned long)ai
->ptr
& WATCHPOINT_ADDR_MASK
, ai
->size
)))
620 if (!matching_access((unsigned long)other_info
->ai
.ptr
, other_info
->ai
.size
,
621 (unsigned long)ai
->ptr
, ai
->size
)) {
623 * If the actual accesses to not match, this was a false
624 * positive due to watchpoint encoding.
626 atomic_long_inc(&kcsan_counters
[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES
]);
633 release_report(flags
, other_info
);
637 static struct access_info
prepare_access_info(const volatile void *ptr
, size_t size
,
638 int access_type
, unsigned long ip
)
640 return (struct access_info
) {
643 .access_type
= access_type
,
644 .task_pid
= in_task() ? task_pid_nr(current
) : -1,
645 .cpu_id
= raw_smp_processor_id(),
646 /* Only replace stack entry with @ip if scoped access. */
647 .ip
= (access_type
& KCSAN_ACCESS_SCOPED
) ? ip
: 0,
651 void kcsan_report_set_info(const volatile void *ptr
, size_t size
, int access_type
,
652 unsigned long ip
, int watchpoint_idx
)
654 const struct access_info ai
= prepare_access_info(ptr
, size
, access_type
, ip
);
657 kcsan_disable_current();
658 lockdep_off(); /* See kcsan_report_known_origin(). */
660 prepare_report_producer(&flags
, &ai
, &other_infos
[watchpoint_idx
]);
663 kcsan_enable_current();
666 void kcsan_report_known_origin(const volatile void *ptr
, size_t size
, int access_type
,
667 unsigned long ip
, enum kcsan_value_change value_change
,
668 int watchpoint_idx
, u64 old
, u64
new, u64 mask
)
670 const struct access_info ai
= prepare_access_info(ptr
, size
, access_type
, ip
);
671 struct other_info
*other_info
= &other_infos
[watchpoint_idx
];
672 unsigned long flags
= 0;
674 kcsan_disable_current();
676 * Because we may generate reports when we're in scheduler code, the use
677 * of printk() could deadlock. Until such time that all printing code
678 * called in print_report() is scheduler-safe, accept the risk, and just
679 * get our message out. As such, also disable lockdep to hide the
680 * warning, and avoid disabling lockdep for the rest of the kernel.
684 if (!prepare_report_consumer(&flags
, &ai
, other_info
))
687 * Never report if value_change is FALSE, only when it is
688 * either TRUE or MAYBE. In case of MAYBE, further filtering may
689 * be done once we know the full stack trace in print_report().
691 if (value_change
!= KCSAN_VALUE_CHANGE_FALSE
)
692 print_report(value_change
, &ai
, other_info
, old
, new, mask
);
694 release_report(&flags
, other_info
);
697 kcsan_enable_current();
700 void kcsan_report_unknown_origin(const volatile void *ptr
, size_t size
, int access_type
,
701 unsigned long ip
, u64 old
, u64
new, u64 mask
)
703 const struct access_info ai
= prepare_access_info(ptr
, size
, access_type
, ip
);
706 kcsan_disable_current();
707 lockdep_off(); /* See kcsan_report_known_origin(). */
709 raw_spin_lock_irqsave(&report_lock
, flags
);
710 print_report(KCSAN_VALUE_CHANGE_TRUE
, &ai
, NULL
, old
, new, mask
);
711 raw_spin_unlock_irqrestore(&report_lock
, flags
);
714 kcsan_enable_current();