Merge tag 'io_uring-5.11-2021-01-16' of git://git.kernel.dk/linux-block
[linux/fpc-iii.git] / kernel / kcsan / report.c
blobd3bf87e6007ca7d555794606b6d56e90dbdaf363
1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/debug_locks.h>
4 #include <linux/delay.h>
5 #include <linux/jiffies.h>
6 #include <linux/kernel.h>
7 #include <linux/lockdep.h>
8 #include <linux/preempt.h>
9 #include <linux/printk.h>
10 #include <linux/sched.h>
11 #include <linux/spinlock.h>
12 #include <linux/stacktrace.h>
14 #include "kcsan.h"
15 #include "encoding.h"
18 * Max. number of stack entries to show in the report.
20 #define NUM_STACK_ENTRIES 64
22 /* Common access info. */
23 struct access_info {
24 const volatile void *ptr;
25 size_t size;
26 int access_type;
27 int task_pid;
28 int cpu_id;
32 * Other thread info: communicated from other racing thread to thread that set
33 * up the watchpoint, which then prints the complete report atomically.
35 struct other_info {
36 struct access_info ai;
37 unsigned long stack_entries[NUM_STACK_ENTRIES];
38 int num_stack_entries;
41 * Optionally pass @current. Typically we do not need to pass @current
42 * via @other_info since just @task_pid is sufficient. Passing @current
43 * has additional overhead.
45 * To safely pass @current, we must either use get_task_struct/
46 * put_task_struct, or stall the thread that populated @other_info.
48 * We cannot rely on get_task_struct/put_task_struct in case
49 * release_report() races with a task being released, and would have to
50 * free it in release_report(). This may result in deadlock if we want
51 * to use KCSAN on the allocators.
53 * Since we also want to reliably print held locks for
54 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
55 * that populated @other_info until it has been consumed.
57 struct task_struct *task;
61 * To never block any producers of struct other_info, we need as many elements
62 * as we have watchpoints (upper bound on concurrent races to report).
64 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
67 * Information about reported races; used to rate limit reporting.
69 struct report_time {
71 * The last time the race was reported.
73 unsigned long time;
76 * The frames of the 2 threads; if only 1 thread is known, one frame
77 * will be 0.
79 unsigned long frame1;
80 unsigned long frame2;
84 * Since we also want to be able to debug allocators with KCSAN, to avoid
85 * deadlock, report_times cannot be dynamically resized with krealloc in
86 * rate_limit_report.
88 * Therefore, we use a fixed-size array, which at most will occupy a page. This
89 * still adequately rate limits reports, assuming that a) number of unique data
90 * races is not excessive, and b) occurrence of unique races within the
91 * same time window is limited.
93 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
94 #define REPORT_TIMES_SIZE \
95 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \
96 REPORT_TIMES_MAX : \
97 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
98 static struct report_time report_times[REPORT_TIMES_SIZE];
101 * Spinlock serializing report generation, and access to @other_infos. Although
102 * it could make sense to have a finer-grained locking story for @other_infos,
103 * report generation needs to be serialized either way, so not much is gained.
105 static DEFINE_RAW_SPINLOCK(report_lock);
108 * Checks if the race identified by thread frames frame1 and frame2 has
109 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
111 static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
113 struct report_time *use_entry = &report_times[0];
114 unsigned long invalid_before;
115 int i;
117 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
119 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
120 return false;
122 invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS);
124 /* Check if a matching race report exists. */
125 for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
126 struct report_time *rt = &report_times[i];
129 * Must always select an entry for use to store info as we
130 * cannot resize report_times; at the end of the scan, use_entry
131 * will be the oldest entry, which ideally also happened before
132 * KCSAN_REPORT_ONCE_IN_MS ago.
134 if (time_before(rt->time, use_entry->time))
135 use_entry = rt;
138 * Initially, no need to check any further as this entry as well
139 * as following entries have never been used.
141 if (rt->time == 0)
142 break;
144 /* Check if entry expired. */
145 if (time_before(rt->time, invalid_before))
146 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
148 /* Reported recently, check if race matches. */
149 if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
150 (rt->frame1 == frame2 && rt->frame2 == frame1))
151 return true;
154 use_entry->time = jiffies;
155 use_entry->frame1 = frame1;
156 use_entry->frame2 = frame2;
157 return false;
161 * Special rules to skip reporting.
163 static bool
164 skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
166 /* Should never get here if value_change==FALSE. */
167 WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE);
170 * The first call to skip_report always has value_change==TRUE, since we
171 * cannot know the value written of an instrumented access. For the 2nd
172 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
174 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
175 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
176 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
177 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
178 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
179 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
181 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
182 * data races where the write may have rewritten the same value; case 6
183 * is possible either if the size is larger than what we check value
184 * changes for or the access type is KCSAN_ACCESS_ASSERT.
186 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) &&
187 value_change == KCSAN_VALUE_CHANGE_MAYBE) {
189 * The access is a write, but the data value did not change.
191 * We opt-out of this filter for certain functions at request of
192 * maintainers.
194 char buf[64];
195 int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
197 if (!strnstr(buf, "rcu_", len) &&
198 !strnstr(buf, "_rcu", len) &&
199 !strnstr(buf, "_srcu", len))
200 return true;
203 return kcsan_skip_report_debugfs(top_frame);
206 static const char *get_access_type(int type)
208 if (type & KCSAN_ACCESS_ASSERT) {
209 if (type & KCSAN_ACCESS_SCOPED) {
210 if (type & KCSAN_ACCESS_WRITE)
211 return "assert no accesses (scoped)";
212 else
213 return "assert no writes (scoped)";
214 } else {
215 if (type & KCSAN_ACCESS_WRITE)
216 return "assert no accesses";
217 else
218 return "assert no writes";
222 switch (type) {
223 case 0:
224 return "read";
225 case KCSAN_ACCESS_ATOMIC:
226 return "read (marked)";
227 case KCSAN_ACCESS_WRITE:
228 return "write";
229 case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
230 return "write (marked)";
231 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE:
232 return "read-write";
233 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
234 return "read-write (marked)";
235 case KCSAN_ACCESS_SCOPED:
236 return "read (scoped)";
237 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
238 return "read (marked, scoped)";
239 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
240 return "write (scoped)";
241 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
242 return "write (marked, scoped)";
243 default:
244 BUG();
248 static const char *get_bug_type(int type)
250 return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race";
253 /* Return thread description: in task or interrupt. */
254 static const char *get_thread_desc(int task_id)
256 if (task_id != -1) {
257 static char buf[32]; /* safe: protected by report_lock */
259 snprintf(buf, sizeof(buf), "task %i", task_id);
260 return buf;
262 return "interrupt";
265 /* Helper to skip KCSAN-related functions in stack-trace. */
266 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
268 char buf[64];
269 char *cur;
270 int len, skip;
272 for (skip = 0; skip < num_entries; ++skip) {
273 len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
275 /* Never show tsan_* or {read,write}_once_size. */
276 if (strnstr(buf, "tsan_", len) ||
277 strnstr(buf, "_once_size", len))
278 continue;
280 cur = strnstr(buf, "kcsan_", len);
281 if (cur) {
282 cur += strlen("kcsan_");
283 if (!str_has_prefix(cur, "test"))
284 continue; /* KCSAN runtime function. */
285 /* KCSAN related test. */
289 * No match for runtime functions -- @skip entries to skip to
290 * get to first frame of interest.
292 break;
295 return skip;
298 /* Compares symbolized strings of addr1 and addr2. */
299 static int sym_strcmp(void *addr1, void *addr2)
301 char buf1[64];
302 char buf2[64];
304 snprintf(buf1, sizeof(buf1), "%pS", addr1);
305 snprintf(buf2, sizeof(buf2), "%pS", addr2);
307 return strncmp(buf1, buf2, sizeof(buf1));
310 static void print_verbose_info(struct task_struct *task)
312 if (!task)
313 return;
315 /* Restore IRQ state trace for printing. */
316 kcsan_restore_irqtrace(task);
318 pr_err("\n");
319 debug_show_held_locks(task);
320 print_irqtrace_events(task);
324 * Returns true if a report was generated, false otherwise.
326 static bool print_report(enum kcsan_value_change value_change,
327 enum kcsan_report_type type,
328 const struct access_info *ai,
329 const struct other_info *other_info)
331 unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
332 int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
333 int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
334 unsigned long this_frame = stack_entries[skipnr];
335 unsigned long other_frame = 0;
336 int other_skipnr = 0; /* silence uninit warnings */
339 * Must check report filter rules before starting to print.
341 if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr]))
342 return false;
344 if (type == KCSAN_REPORT_RACE_SIGNAL) {
345 other_skipnr = get_stack_skipnr(other_info->stack_entries,
346 other_info->num_stack_entries);
347 other_frame = other_info->stack_entries[other_skipnr];
349 /* @value_change is only known for the other thread */
350 if (skip_report(value_change, other_frame))
351 return false;
354 if (rate_limit_report(this_frame, other_frame))
355 return false;
357 /* Print report header. */
358 pr_err("==================================================================\n");
359 switch (type) {
360 case KCSAN_REPORT_RACE_SIGNAL: {
361 int cmp;
364 * Order functions lexographically for consistent bug titles.
365 * Do not print offset of functions to keep title short.
367 cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
368 pr_err("BUG: KCSAN: %s in %ps / %ps\n",
369 get_bug_type(ai->access_type | other_info->ai.access_type),
370 (void *)(cmp < 0 ? other_frame : this_frame),
371 (void *)(cmp < 0 ? this_frame : other_frame));
372 } break;
374 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
375 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
376 (void *)this_frame);
377 break;
379 default:
380 BUG();
383 pr_err("\n");
385 /* Print information about the racing accesses. */
386 switch (type) {
387 case KCSAN_REPORT_RACE_SIGNAL:
388 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
389 get_access_type(other_info->ai.access_type), other_info->ai.ptr,
390 other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
391 other_info->ai.cpu_id);
393 /* Print the other thread's stack trace. */
394 stack_trace_print(other_info->stack_entries + other_skipnr,
395 other_info->num_stack_entries - other_skipnr,
398 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
399 print_verbose_info(other_info->task);
401 pr_err("\n");
402 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
403 get_access_type(ai->access_type), ai->ptr, ai->size,
404 get_thread_desc(ai->task_pid), ai->cpu_id);
405 break;
407 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
408 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
409 get_access_type(ai->access_type), ai->ptr, ai->size,
410 get_thread_desc(ai->task_pid), ai->cpu_id);
411 break;
413 default:
414 BUG();
416 /* Print stack trace of this thread. */
417 stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
420 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
421 print_verbose_info(current);
423 /* Print report footer. */
424 pr_err("\n");
425 pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
426 dump_stack_print_info(KERN_DEFAULT);
427 pr_err("==================================================================\n");
429 return true;
432 static void release_report(unsigned long *flags, struct other_info *other_info)
434 if (other_info)
436 * Use size to denote valid/invalid, since KCSAN entirely
437 * ignores 0-sized accesses.
439 other_info->ai.size = 0;
441 raw_spin_unlock_irqrestore(&report_lock, *flags);
445 * Sets @other_info->task and awaits consumption of @other_info.
447 * Precondition: report_lock is held.
448 * Postcondition: report_lock is held.
450 static void set_other_info_task_blocking(unsigned long *flags,
451 const struct access_info *ai,
452 struct other_info *other_info)
455 * We may be instrumenting a code-path where current->state is already
456 * something other than TASK_RUNNING.
458 const bool is_running = current->state == TASK_RUNNING;
460 * To avoid deadlock in case we are in an interrupt here and this is a
461 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
462 * timeout to ensure this works in all contexts.
464 * Await approximately the worst case delay of the reporting thread (if
465 * we are not interrupted).
467 int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
469 other_info->task = current;
470 do {
471 if (is_running) {
473 * Let lockdep know the real task is sleeping, to print
474 * the held locks (recall we turned lockdep off, so
475 * locking/unlocking @report_lock won't be recorded).
477 set_current_state(TASK_UNINTERRUPTIBLE);
479 raw_spin_unlock_irqrestore(&report_lock, *flags);
481 * We cannot call schedule() since we also cannot reliably
482 * determine if sleeping here is permitted -- see in_atomic().
485 udelay(1);
486 raw_spin_lock_irqsave(&report_lock, *flags);
487 if (timeout-- < 0) {
489 * Abort. Reset @other_info->task to NULL, since it
490 * appears the other thread is still going to consume
491 * it. It will result in no verbose info printed for
492 * this task.
494 other_info->task = NULL;
495 break;
498 * If invalid, or @ptr nor @current matches, then @other_info
499 * has been consumed and we may continue. If not, retry.
501 } while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
502 other_info->task == current);
503 if (is_running)
504 set_current_state(TASK_RUNNING);
507 /* Populate @other_info; requires that the provided @other_info not in use. */
508 static void prepare_report_producer(unsigned long *flags,
509 const struct access_info *ai,
510 struct other_info *other_info)
512 raw_spin_lock_irqsave(&report_lock, *flags);
515 * The same @other_infos entry cannot be used concurrently, because
516 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
517 * core.c), and a watchpoint is only released for reuse after reporting
518 * is done by the consumer of @other_info. Therefore, it is impossible
519 * for another concurrent prepare_report_producer() to set the same
520 * @other_info, and are guaranteed exclusivity for the @other_infos
521 * entry pointed to by @other_info.
523 * To check this property holds, size should never be non-zero here,
524 * because every consumer of struct other_info resets size to 0 in
525 * release_report().
527 WARN_ON(other_info->ai.size);
529 other_info->ai = *ai;
530 other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
532 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
533 set_other_info_task_blocking(flags, ai, other_info);
535 raw_spin_unlock_irqrestore(&report_lock, *flags);
538 /* Awaits producer to fill @other_info and then returns. */
539 static bool prepare_report_consumer(unsigned long *flags,
540 const struct access_info *ai,
541 struct other_info *other_info)
544 raw_spin_lock_irqsave(&report_lock, *flags);
545 while (!other_info->ai.size) { /* Await valid @other_info. */
546 raw_spin_unlock_irqrestore(&report_lock, *flags);
547 cpu_relax();
548 raw_spin_lock_irqsave(&report_lock, *flags);
551 /* Should always have a matching access based on watchpoint encoding. */
552 if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
553 (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
554 goto discard;
556 if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
557 (unsigned long)ai->ptr, ai->size)) {
559 * If the actual accesses to not match, this was a false
560 * positive due to watchpoint encoding.
562 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]);
563 goto discard;
566 return true;
568 discard:
569 release_report(flags, other_info);
570 return false;
574 * Depending on the report type either sets @other_info and returns false, or
575 * awaits @other_info and returns true. If @other_info is not required for the
576 * report type, simply acquires @report_lock and returns true.
578 static noinline bool prepare_report(unsigned long *flags,
579 enum kcsan_report_type type,
580 const struct access_info *ai,
581 struct other_info *other_info)
583 switch (type) {
584 case KCSAN_REPORT_CONSUMED_WATCHPOINT:
585 prepare_report_producer(flags, ai, other_info);
586 return false;
587 case KCSAN_REPORT_RACE_SIGNAL:
588 return prepare_report_consumer(flags, ai, other_info);
589 default:
590 /* @other_info not required; just acquire @report_lock. */
591 raw_spin_lock_irqsave(&report_lock, *flags);
592 return true;
596 void kcsan_report(const volatile void *ptr, size_t size, int access_type,
597 enum kcsan_value_change value_change,
598 enum kcsan_report_type type, int watchpoint_idx)
600 unsigned long flags = 0;
601 const struct access_info ai = {
602 .ptr = ptr,
603 .size = size,
604 .access_type = access_type,
605 .task_pid = in_task() ? task_pid_nr(current) : -1,
606 .cpu_id = raw_smp_processor_id()
608 struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
609 ? NULL : &other_infos[watchpoint_idx];
611 kcsan_disable_current();
612 if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
613 goto out;
616 * Because we may generate reports when we're in scheduler code, the use
617 * of printk() could deadlock. Until such time that all printing code
618 * called in print_report() is scheduler-safe, accept the risk, and just
619 * get our message out. As such, also disable lockdep to hide the
620 * warning, and avoid disabling lockdep for the rest of the kernel.
622 lockdep_off();
624 if (prepare_report(&flags, type, &ai, other_info)) {
626 * Never report if value_change is FALSE, only if we it is
627 * either TRUE or MAYBE. In case of MAYBE, further filtering may
628 * be done once we know the full stack trace in print_report().
630 bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
631 print_report(value_change, type, &ai, other_info);
633 if (reported && panic_on_warn)
634 panic("panic_on_warn set ...\n");
636 release_report(&flags, other_info);
639 lockdep_on();
640 out:
641 kcsan_enable_current();