1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/debug_locks.h>
4 #include <linux/delay.h>
5 #include <linux/jiffies.h>
6 #include <linux/kernel.h>
7 #include <linux/lockdep.h>
8 #include <linux/preempt.h>
9 #include <linux/printk.h>
10 #include <linux/sched.h>
11 #include <linux/spinlock.h>
12 #include <linux/stacktrace.h>
18 * Max. number of stack entries to show in the report.
20 #define NUM_STACK_ENTRIES 64
22 /* Common access info. */
24 const volatile void *ptr
;
32 * Other thread info: communicated from other racing thread to thread that set
33 * up the watchpoint, which then prints the complete report atomically.
36 struct access_info ai
;
37 unsigned long stack_entries
[NUM_STACK_ENTRIES
];
38 int num_stack_entries
;
41 * Optionally pass @current. Typically we do not need to pass @current
42 * via @other_info since just @task_pid is sufficient. Passing @current
43 * has additional overhead.
45 * To safely pass @current, we must either use get_task_struct/
46 * put_task_struct, or stall the thread that populated @other_info.
48 * We cannot rely on get_task_struct/put_task_struct in case
49 * release_report() races with a task being released, and would have to
50 * free it in release_report(). This may result in deadlock if we want
51 * to use KCSAN on the allocators.
53 * Since we also want to reliably print held locks for
54 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
55 * that populated @other_info until it has been consumed.
57 struct task_struct
*task
;
61 * To never block any producers of struct other_info, we need as many elements
62 * as we have watchpoints (upper bound on concurrent races to report).
64 static struct other_info other_infos
[CONFIG_KCSAN_NUM_WATCHPOINTS
+ NUM_SLOTS
-1];
67 * Information about reported races; used to rate limit reporting.
71 * The last time the race was reported.
76 * The frames of the 2 threads; if only 1 thread is known, one frame
84 * Since we also want to be able to debug allocators with KCSAN, to avoid
85 * deadlock, report_times cannot be dynamically resized with krealloc in
88 * Therefore, we use a fixed-size array, which at most will occupy a page. This
89 * still adequately rate limits reports, assuming that a) number of unique data
90 * races is not excessive, and b) occurrence of unique races within the
91 * same time window is limited.
93 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
94 #define REPORT_TIMES_SIZE \
95 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \
97 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
98 static struct report_time report_times
[REPORT_TIMES_SIZE
];
101 * Spinlock serializing report generation, and access to @other_infos. Although
102 * it could make sense to have a finer-grained locking story for @other_infos,
103 * report generation needs to be serialized either way, so not much is gained.
105 static DEFINE_RAW_SPINLOCK(report_lock
);
108 * Checks if the race identified by thread frames frame1 and frame2 has
109 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
111 static bool rate_limit_report(unsigned long frame1
, unsigned long frame2
)
113 struct report_time
*use_entry
= &report_times
[0];
114 unsigned long invalid_before
;
117 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS
!= 0 && REPORT_TIMES_SIZE
== 0);
119 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS
== 0)
122 invalid_before
= jiffies
- msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS
);
124 /* Check if a matching race report exists. */
125 for (i
= 0; i
< REPORT_TIMES_SIZE
; ++i
) {
126 struct report_time
*rt
= &report_times
[i
];
129 * Must always select an entry for use to store info as we
130 * cannot resize report_times; at the end of the scan, use_entry
131 * will be the oldest entry, which ideally also happened before
132 * KCSAN_REPORT_ONCE_IN_MS ago.
134 if (time_before(rt
->time
, use_entry
->time
))
138 * Initially, no need to check any further as this entry as well
139 * as following entries have never been used.
144 /* Check if entry expired. */
145 if (time_before(rt
->time
, invalid_before
))
146 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
148 /* Reported recently, check if race matches. */
149 if ((rt
->frame1
== frame1
&& rt
->frame2
== frame2
) ||
150 (rt
->frame1
== frame2
&& rt
->frame2
== frame1
))
154 use_entry
->time
= jiffies
;
155 use_entry
->frame1
= frame1
;
156 use_entry
->frame2
= frame2
;
161 * Special rules to skip reporting.
164 skip_report(enum kcsan_value_change value_change
, unsigned long top_frame
)
166 /* Should never get here if value_change==FALSE. */
167 WARN_ON_ONCE(value_change
== KCSAN_VALUE_CHANGE_FALSE
);
170 * The first call to skip_report always has value_change==TRUE, since we
171 * cannot know the value written of an instrumented access. For the 2nd
172 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
174 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
175 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
176 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
177 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
178 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
179 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
181 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
182 * data races where the write may have rewritten the same value; case 6
183 * is possible either if the size is larger than what we check value
184 * changes for or the access type is KCSAN_ACCESS_ASSERT.
186 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY
) &&
187 value_change
== KCSAN_VALUE_CHANGE_MAYBE
) {
189 * The access is a write, but the data value did not change.
191 * We opt-out of this filter for certain functions at request of
195 int len
= scnprintf(buf
, sizeof(buf
), "%ps", (void *)top_frame
);
197 if (!strnstr(buf
, "rcu_", len
) &&
198 !strnstr(buf
, "_rcu", len
) &&
199 !strnstr(buf
, "_srcu", len
))
203 return kcsan_skip_report_debugfs(top_frame
);
206 static const char *get_access_type(int type
)
208 if (type
& KCSAN_ACCESS_ASSERT
) {
209 if (type
& KCSAN_ACCESS_SCOPED
) {
210 if (type
& KCSAN_ACCESS_WRITE
)
211 return "assert no accesses (scoped)";
213 return "assert no writes (scoped)";
215 if (type
& KCSAN_ACCESS_WRITE
)
216 return "assert no accesses";
218 return "assert no writes";
225 case KCSAN_ACCESS_ATOMIC
:
226 return "read (marked)";
227 case KCSAN_ACCESS_WRITE
:
229 case KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
230 return "write (marked)";
231 case KCSAN_ACCESS_COMPOUND
| KCSAN_ACCESS_WRITE
:
233 case KCSAN_ACCESS_COMPOUND
| KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
234 return "read-write (marked)";
235 case KCSAN_ACCESS_SCOPED
:
236 return "read (scoped)";
237 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_ATOMIC
:
238 return "read (marked, scoped)";
239 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_WRITE
:
240 return "write (scoped)";
241 case KCSAN_ACCESS_SCOPED
| KCSAN_ACCESS_WRITE
| KCSAN_ACCESS_ATOMIC
:
242 return "write (marked, scoped)";
248 static const char *get_bug_type(int type
)
250 return (type
& KCSAN_ACCESS_ASSERT
) != 0 ? "assert: race" : "data-race";
253 /* Return thread description: in task or interrupt. */
254 static const char *get_thread_desc(int task_id
)
257 static char buf
[32]; /* safe: protected by report_lock */
259 snprintf(buf
, sizeof(buf
), "task %i", task_id
);
265 /* Helper to skip KCSAN-related functions in stack-trace. */
266 static int get_stack_skipnr(const unsigned long stack_entries
[], int num_entries
)
272 for (skip
= 0; skip
< num_entries
; ++skip
) {
273 len
= scnprintf(buf
, sizeof(buf
), "%ps", (void *)stack_entries
[skip
]);
275 /* Never show tsan_* or {read,write}_once_size. */
276 if (strnstr(buf
, "tsan_", len
) ||
277 strnstr(buf
, "_once_size", len
))
280 cur
= strnstr(buf
, "kcsan_", len
);
282 cur
+= strlen("kcsan_");
283 if (!str_has_prefix(cur
, "test"))
284 continue; /* KCSAN runtime function. */
285 /* KCSAN related test. */
289 * No match for runtime functions -- @skip entries to skip to
290 * get to first frame of interest.
298 /* Compares symbolized strings of addr1 and addr2. */
299 static int sym_strcmp(void *addr1
, void *addr2
)
304 snprintf(buf1
, sizeof(buf1
), "%pS", addr1
);
305 snprintf(buf2
, sizeof(buf2
), "%pS", addr2
);
307 return strncmp(buf1
, buf2
, sizeof(buf1
));
310 static void print_verbose_info(struct task_struct
*task
)
315 /* Restore IRQ state trace for printing. */
316 kcsan_restore_irqtrace(task
);
319 debug_show_held_locks(task
);
320 print_irqtrace_events(task
);
324 * Returns true if a report was generated, false otherwise.
326 static bool print_report(enum kcsan_value_change value_change
,
327 enum kcsan_report_type type
,
328 const struct access_info
*ai
,
329 const struct other_info
*other_info
)
331 unsigned long stack_entries
[NUM_STACK_ENTRIES
] = { 0 };
332 int num_stack_entries
= stack_trace_save(stack_entries
, NUM_STACK_ENTRIES
, 1);
333 int skipnr
= get_stack_skipnr(stack_entries
, num_stack_entries
);
334 unsigned long this_frame
= stack_entries
[skipnr
];
335 unsigned long other_frame
= 0;
336 int other_skipnr
= 0; /* silence uninit warnings */
339 * Must check report filter rules before starting to print.
341 if (skip_report(KCSAN_VALUE_CHANGE_TRUE
, stack_entries
[skipnr
]))
344 if (type
== KCSAN_REPORT_RACE_SIGNAL
) {
345 other_skipnr
= get_stack_skipnr(other_info
->stack_entries
,
346 other_info
->num_stack_entries
);
347 other_frame
= other_info
->stack_entries
[other_skipnr
];
349 /* @value_change is only known for the other thread */
350 if (skip_report(value_change
, other_frame
))
354 if (rate_limit_report(this_frame
, other_frame
))
357 /* Print report header. */
358 pr_err("==================================================================\n");
360 case KCSAN_REPORT_RACE_SIGNAL
: {
364 * Order functions lexographically for consistent bug titles.
365 * Do not print offset of functions to keep title short.
367 cmp
= sym_strcmp((void *)other_frame
, (void *)this_frame
);
368 pr_err("BUG: KCSAN: %s in %ps / %ps\n",
369 get_bug_type(ai
->access_type
| other_info
->ai
.access_type
),
370 (void *)(cmp
< 0 ? other_frame
: this_frame
),
371 (void *)(cmp
< 0 ? this_frame
: other_frame
));
374 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
:
375 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai
->access_type
),
385 /* Print information about the racing accesses. */
387 case KCSAN_REPORT_RACE_SIGNAL
:
388 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
389 get_access_type(other_info
->ai
.access_type
), other_info
->ai
.ptr
,
390 other_info
->ai
.size
, get_thread_desc(other_info
->ai
.task_pid
),
391 other_info
->ai
.cpu_id
);
393 /* Print the other thread's stack trace. */
394 stack_trace_print(other_info
->stack_entries
+ other_skipnr
,
395 other_info
->num_stack_entries
- other_skipnr
,
398 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE
))
399 print_verbose_info(other_info
->task
);
402 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
403 get_access_type(ai
->access_type
), ai
->ptr
, ai
->size
,
404 get_thread_desc(ai
->task_pid
), ai
->cpu_id
);
407 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
:
408 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
409 get_access_type(ai
->access_type
), ai
->ptr
, ai
->size
,
410 get_thread_desc(ai
->task_pid
), ai
->cpu_id
);
416 /* Print stack trace of this thread. */
417 stack_trace_print(stack_entries
+ skipnr
, num_stack_entries
- skipnr
,
420 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE
))
421 print_verbose_info(current
);
423 /* Print report footer. */
425 pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
426 dump_stack_print_info(KERN_DEFAULT
);
427 pr_err("==================================================================\n");
432 static void release_report(unsigned long *flags
, struct other_info
*other_info
)
436 * Use size to denote valid/invalid, since KCSAN entirely
437 * ignores 0-sized accesses.
439 other_info
->ai
.size
= 0;
441 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
445 * Sets @other_info->task and awaits consumption of @other_info.
447 * Precondition: report_lock is held.
448 * Postcondition: report_lock is held.
450 static void set_other_info_task_blocking(unsigned long *flags
,
451 const struct access_info
*ai
,
452 struct other_info
*other_info
)
455 * We may be instrumenting a code-path where current->state is already
456 * something other than TASK_RUNNING.
458 const bool is_running
= current
->state
== TASK_RUNNING
;
460 * To avoid deadlock in case we are in an interrupt here and this is a
461 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
462 * timeout to ensure this works in all contexts.
464 * Await approximately the worst case delay of the reporting thread (if
465 * we are not interrupted).
467 int timeout
= max(kcsan_udelay_task
, kcsan_udelay_interrupt
);
469 other_info
->task
= current
;
473 * Let lockdep know the real task is sleeping, to print
474 * the held locks (recall we turned lockdep off, so
475 * locking/unlocking @report_lock won't be recorded).
477 set_current_state(TASK_UNINTERRUPTIBLE
);
479 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
481 * We cannot call schedule() since we also cannot reliably
482 * determine if sleeping here is permitted -- see in_atomic().
486 raw_spin_lock_irqsave(&report_lock
, *flags
);
489 * Abort. Reset @other_info->task to NULL, since it
490 * appears the other thread is still going to consume
491 * it. It will result in no verbose info printed for
494 other_info
->task
= NULL
;
498 * If invalid, or @ptr nor @current matches, then @other_info
499 * has been consumed and we may continue. If not, retry.
501 } while (other_info
->ai
.size
&& other_info
->ai
.ptr
== ai
->ptr
&&
502 other_info
->task
== current
);
504 set_current_state(TASK_RUNNING
);
507 /* Populate @other_info; requires that the provided @other_info not in use. */
508 static void prepare_report_producer(unsigned long *flags
,
509 const struct access_info
*ai
,
510 struct other_info
*other_info
)
512 raw_spin_lock_irqsave(&report_lock
, *flags
);
515 * The same @other_infos entry cannot be used concurrently, because
516 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
517 * core.c), and a watchpoint is only released for reuse after reporting
518 * is done by the consumer of @other_info. Therefore, it is impossible
519 * for another concurrent prepare_report_producer() to set the same
520 * @other_info, and are guaranteed exclusivity for the @other_infos
521 * entry pointed to by @other_info.
523 * To check this property holds, size should never be non-zero here,
524 * because every consumer of struct other_info resets size to 0 in
527 WARN_ON(other_info
->ai
.size
);
529 other_info
->ai
= *ai
;
530 other_info
->num_stack_entries
= stack_trace_save(other_info
->stack_entries
, NUM_STACK_ENTRIES
, 2);
532 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE
))
533 set_other_info_task_blocking(flags
, ai
, other_info
);
535 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
538 /* Awaits producer to fill @other_info and then returns. */
539 static bool prepare_report_consumer(unsigned long *flags
,
540 const struct access_info
*ai
,
541 struct other_info
*other_info
)
544 raw_spin_lock_irqsave(&report_lock
, *flags
);
545 while (!other_info
->ai
.size
) { /* Await valid @other_info. */
546 raw_spin_unlock_irqrestore(&report_lock
, *flags
);
548 raw_spin_lock_irqsave(&report_lock
, *flags
);
551 /* Should always have a matching access based on watchpoint encoding. */
552 if (WARN_ON(!matching_access((unsigned long)other_info
->ai
.ptr
& WATCHPOINT_ADDR_MASK
, other_info
->ai
.size
,
553 (unsigned long)ai
->ptr
& WATCHPOINT_ADDR_MASK
, ai
->size
)))
556 if (!matching_access((unsigned long)other_info
->ai
.ptr
, other_info
->ai
.size
,
557 (unsigned long)ai
->ptr
, ai
->size
)) {
559 * If the actual accesses to not match, this was a false
560 * positive due to watchpoint encoding.
562 atomic_long_inc(&kcsan_counters
[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES
]);
569 release_report(flags
, other_info
);
574 * Depending on the report type either sets @other_info and returns false, or
575 * awaits @other_info and returns true. If @other_info is not required for the
576 * report type, simply acquires @report_lock and returns true.
578 static noinline
bool prepare_report(unsigned long *flags
,
579 enum kcsan_report_type type
,
580 const struct access_info
*ai
,
581 struct other_info
*other_info
)
584 case KCSAN_REPORT_CONSUMED_WATCHPOINT
:
585 prepare_report_producer(flags
, ai
, other_info
);
587 case KCSAN_REPORT_RACE_SIGNAL
:
588 return prepare_report_consumer(flags
, ai
, other_info
);
590 /* @other_info not required; just acquire @report_lock. */
591 raw_spin_lock_irqsave(&report_lock
, *flags
);
596 void kcsan_report(const volatile void *ptr
, size_t size
, int access_type
,
597 enum kcsan_value_change value_change
,
598 enum kcsan_report_type type
, int watchpoint_idx
)
600 unsigned long flags
= 0;
601 const struct access_info ai
= {
604 .access_type
= access_type
,
605 .task_pid
= in_task() ? task_pid_nr(current
) : -1,
606 .cpu_id
= raw_smp_processor_id()
608 struct other_info
*other_info
= type
== KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
609 ? NULL
: &other_infos
[watchpoint_idx
];
611 kcsan_disable_current();
612 if (WARN_ON(watchpoint_idx
< 0 || watchpoint_idx
>= ARRAY_SIZE(other_infos
)))
616 * Because we may generate reports when we're in scheduler code, the use
617 * of printk() could deadlock. Until such time that all printing code
618 * called in print_report() is scheduler-safe, accept the risk, and just
619 * get our message out. As such, also disable lockdep to hide the
620 * warning, and avoid disabling lockdep for the rest of the kernel.
624 if (prepare_report(&flags
, type
, &ai
, other_info
)) {
626 * Never report if value_change is FALSE, only if we it is
627 * either TRUE or MAYBE. In case of MAYBE, further filtering may
628 * be done once we know the full stack trace in print_report().
630 bool reported
= value_change
!= KCSAN_VALUE_CHANGE_FALSE
&&
631 print_report(value_change
, type
, &ai
, other_info
);
633 if (reported
&& panic_on_warn
)
634 panic("panic_on_warn set ...\n");
636 release_report(&flags
, other_info
);
641 kcsan_enable_current();